diff --git a/Extras/vectormathlibrary/LICENSE b/Extras/vectormathlibrary/LICENSE
index 476f22850..296fb8f3f 100644
--- a/Extras/vectormathlibrary/LICENSE
+++ b/Extras/vectormathlibrary/LICENSE
@@ -1,31 +1,31 @@
- Vector Math library for 3-D linear algebra (vector, matrix, quaternion)
-   SIMD support for SSE, PowerPC (PPU) and the SPU.
-   Also includes generic multi-platform scalar version. 
-
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. 
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
- 
+ Vector Math library for 3-D linear algebra (vector, matrix, quaternion)
+   SIMD support for SSE, PowerPC (PPU) and the SPU.
+   Also includes generic multi-platform scalar version. 
+
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. 
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+ 
diff --git a/Extras/vectormathlibrary/Makefile b/Extras/vectormathlibrary/Makefile
index 49b5523a3..e0afd1f0a 100644
--- a/Extras/vectormathlibrary/Makefile
+++ b/Extras/vectormathlibrary/Makefile
@@ -1,119 +1,119 @@
-# Makefile for vector math library.
-#
-#   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-#   All rights reserved.
-#
-#   Redistribution and use in source and binary forms,
-#   with or without modification, are permitted provided that the
-#   following conditions are met:
-#    * Redistributions of source code must retain the above copyright
-#      notice, this list of conditions and the following disclaimer.
-#    * Redistributions in binary form must reproduce the above copyright
-#      notice, this list of conditions and the following disclaimer in the
-#      documentation and/or other materials provided with the distribution.
-#    * Neither the name of the Sony Computer Entertainment Inc nor the names
-#      of its contributors may be used to endorse or promote products derived
-#      from this software without specific prior written permission.
-#
-#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-#   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-#   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-#   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-#   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-#   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-#   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-#   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-#   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-#   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-#   POSSIBILITY OF SUCH DAMAGE.
-
-# How to build:
-#
-#   Nothing to do (The all of the library is implemented as inline).
-#
-# How to install:
-#
-#   To install the library:
-#
-#      make ARCH=<ARCHITECTURE> install
-#
-#   where <ARCHITECTURE> must be one of:
-#
-#      ppu (PowerPC)
-#      spu
-#      SSE
-#      scalar (generic)
-#
-#   e.g.) make ARCH=cell install
-#
-
-topdir = .
-ARCH = scalar
-
-prefix_spu = /usr/spu
-
-ARCH_DIRS = $(ARCH)
-ARCH_INSTALL= $(ARCH_INSTALL_$(ARCH))
-ARCH_CHECK= $(ARCH_CHECK_$(ARCH))
-
-prefix = $(if $(prefix_$(ARCH)),$(prefix_$(ARCH)),/usr)
-DESTDIR =
-
-COMMON_DIRS = scalar
-
-INSTALL = install
-
-LIB_MAJOR_VERSION = 1
-LIB_MINOR_VERSION = 0
-LIB_RELEASE = 1
-LIB_FULL_VERSION = $(LIB_MAJOR_VERSION).$(LIB_MINOR_VERSION).$(LIB_RELEASE)
-
-LIB_BASE = vectormath
-
-TAR_NAME = $(LIB_BASE)-$(LIB_FULL_VERSION)
-TAR_BALL = $(TAR_NAME).tar.gz
-
-all:
-	@true
-
-install:
-	$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/include/vectormath/c
-	$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/include/vectormath/cpp
-	$(INSTALL) -m 644 include/vectormath/c/*.h $(DESTDIR)$(prefix)/include/vectormath/c/
-	$(INSTALL) -m 644 include/vectormath/cpp/*.h $(DESTDIR)$(prefix)/include/vectormath/cpp/
-	$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/include/vectormath/cpp
-	for _d in $(ARCH_DIRS) $(COMMON_DIRS); do \
-		if test -d include/vectormath/$$_d/c; then \
-			$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/include/vectormath/$$_d/c && \
-			$(INSTALL) -m 644 include/vectormath/$$_d/c/*.h \
-				$(DESTDIR)$(prefix)/include/vectormath/$$_d/c/ || exit 1; \
-		fi; \
-		if test -d include/vectormath/$$_d/cpp; then \
-			$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/include/vectormath/$$_d/cpp && \
-			$(INSTALL) -m 644 include/vectormath/$$_d/cpp/*.h \
-				$(DESTDIR)$(prefix)/include/vectormath/$$_d/cpp/ || exit 1; \
-		fi; \
-	done
-
-check:
-	$(MAKE) -C tests ARCH=$(ARCH) check
-
-clean:
-	$(MAKE) -C tests clean
-	-rm -f $(TAR_BALL)
-
-distclean:
-	$(MAKE) -C tests distclean
-
-dist:
-	-rm -rf .dist
-	mkdir -p .dist/$(TAR_NAME)
-	find . -name .dist -prune -o \
-		-name .CVS -prune -o -name .svn -prune -o \
-		-name .pc -prune  -o -name patches -prune -o \
-		'(' -name README -o -name LICENSE -o \
-			-name Makefile -o -name '*.[ch]' -o -name '*.cpp' -o \
-			-name '*.pl' -o -name '*.txt' -o -name '*.pdf' -o -name '*.spec' ')' \
-		-print | tar -T - -cf - | tar xf - -C .dist/$(TAR_NAME)
-	tar zcf $(TAR_BALL) -C .dist $(TAR_NAME)
-	-rm -rf .dist
+# Makefile for vector math library.
+#
+#   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms,
+#   with or without modification, are permitted provided that the
+#   following conditions are met:
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in the
+#      documentation and/or other materials provided with the distribution.
+#    * Neither the name of the Sony Computer Entertainment Inc nor the names
+#      of its contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+#   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+#   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+#   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+#   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+#   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+#   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+#   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+#   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+#   POSSIBILITY OF SUCH DAMAGE.
+
+# How to build:
+#
+#   Nothing to do (The all of the library is implemented as inline).
+#
+# How to install:
+#
+#   To install the library:
+#
+#      make ARCH=<ARCHITECTURE> install
+#
+#   where <ARCHITECTURE> must be one of:
+#
+#      ppu (PowerPC)
+#      spu
+#      SSE
+#      scalar (generic)
+#
+#   e.g.) make ARCH=cell install
+#
+
+topdir = .
+ARCH = scalar
+
+prefix_spu = /usr/spu
+
+ARCH_DIRS = $(ARCH)
+ARCH_INSTALL= $(ARCH_INSTALL_$(ARCH))
+ARCH_CHECK= $(ARCH_CHECK_$(ARCH))
+
+prefix = $(if $(prefix_$(ARCH)),$(prefix_$(ARCH)),/usr)
+DESTDIR =
+
+COMMON_DIRS = scalar
+
+INSTALL = install
+
+LIB_MAJOR_VERSION = 1
+LIB_MINOR_VERSION = 0
+LIB_RELEASE = 1
+LIB_FULL_VERSION = $(LIB_MAJOR_VERSION).$(LIB_MINOR_VERSION).$(LIB_RELEASE)
+
+LIB_BASE = vectormath
+
+TAR_NAME = $(LIB_BASE)-$(LIB_FULL_VERSION)
+TAR_BALL = $(TAR_NAME).tar.gz
+
+all:
+	@true
+
+install:
+	$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/include/vectormath/c
+	$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/include/vectormath/cpp
+	$(INSTALL) -m 644 include/vectormath/c/*.h $(DESTDIR)$(prefix)/include/vectormath/c/
+	$(INSTALL) -m 644 include/vectormath/cpp/*.h $(DESTDIR)$(prefix)/include/vectormath/cpp/
+	$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/include/vectormath/cpp
+	for _d in $(ARCH_DIRS) $(COMMON_DIRS); do \
+		if test -d include/vectormath/$$_d/c; then \
+			$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/include/vectormath/$$_d/c && \
+			$(INSTALL) -m 644 include/vectormath/$$_d/c/*.h \
+				$(DESTDIR)$(prefix)/include/vectormath/$$_d/c/ || exit 1; \
+		fi; \
+		if test -d include/vectormath/$$_d/cpp; then \
+			$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/include/vectormath/$$_d/cpp && \
+			$(INSTALL) -m 644 include/vectormath/$$_d/cpp/*.h \
+				$(DESTDIR)$(prefix)/include/vectormath/$$_d/cpp/ || exit 1; \
+		fi; \
+	done
+
+check:
+	$(MAKE) -C tests ARCH=$(ARCH) check
+
+clean:
+	$(MAKE) -C tests clean
+	-rm -f $(TAR_BALL)
+
+distclean:
+	$(MAKE) -C tests distclean
+
+dist:
+	-rm -rf .dist
+	mkdir -p .dist/$(TAR_NAME)
+	find . -name .dist -prune -o \
+		-name .CVS -prune -o -name .svn -prune -o \
+		-name .pc -prune  -o -name patches -prune -o \
+		'(' -name README -o -name LICENSE -o \
+			-name Makefile -o -name '*.[ch]' -o -name '*.cpp' -o \
+			-name '*.pl' -o -name '*.txt' -o -name '*.pdf' -o -name '*.spec' ')' \
+		-print | tar -T - -cf - | tar xf - -C .dist/$(TAR_NAME)
+	tar zcf $(TAR_BALL) -C .dist $(TAR_NAME)
+	-rm -rf .dist
diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/boolInVec.h b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/boolInVec.h
index d18cb15ce..d21d25cbb 100644
--- a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/boolInVec.h
+++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/boolInVec.h
@@ -1,247 +1,247 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _BOOLINVEC_H
-#define _BOOLINVEC_H
-
-#include <math.h>
-
-namespace Vectormath {
-
-class floatInVec;
-
-//--------------------------------------------------------------------------------------------------
-// boolInVec class
-//
-
-class boolInVec
-{
-    private:
-        __m128 mData;
-
-        inline boolInVec(__m128 vec);
-    public:
-        inline boolInVec() {}
-
-        // matches standard type conversions
-        //
-        inline boolInVec(const floatInVec &vec);
-
-        // explicit cast from bool
-        //
-        explicit inline boolInVec(bool scalar);
-
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-        // explicit cast to bool
-        // 
-        inline bool getAsBool() const;
-#else
-        // implicit cast to bool
-        // 
-        inline operator bool() const;
-#endif
-        
-        // get vector data
-        // bool value is splatted across all word slots of vector as 0 (false) or -1 (true)
-        //
-        inline __m128 get128() const;
-
-        // operators
-        //
-        inline const boolInVec operator ! () const;
-        inline boolInVec& operator = (const boolInVec &vec);
-        inline boolInVec& operator &= (const boolInVec &vec);
-        inline boolInVec& operator ^= (const boolInVec &vec);
-        inline boolInVec& operator |= (const boolInVec &vec);
-
-        // friend functions
-        //
-        friend inline const boolInVec operator == (const boolInVec &vec0, const boolInVec &vec1);
-        friend inline const boolInVec operator != (const boolInVec &vec0, const boolInVec &vec1);
-        friend inline const boolInVec operator < (const floatInVec &vec0, const floatInVec &vec1);
-        friend inline const boolInVec operator <= (const floatInVec &vec0, const floatInVec &vec1);
-        friend inline const boolInVec operator > (const floatInVec &vec0, const floatInVec &vec1);
-        friend inline const boolInVec operator >= (const floatInVec &vec0, const floatInVec &vec1);
-        friend inline const boolInVec operator == (const floatInVec &vec0, const floatInVec &vec1);
-        friend inline const boolInVec operator != (const floatInVec &vec0, const floatInVec &vec1);
-        friend inline const boolInVec operator & (const boolInVec &vec0, const boolInVec &vec1);
-        friend inline const boolInVec operator ^ (const boolInVec &vec0, const boolInVec &vec1);
-        friend inline const boolInVec operator | (const boolInVec &vec0, const boolInVec &vec1);
-        friend inline const boolInVec select(const boolInVec &vec0, const boolInVec &vec1, const boolInVec &select_vec1);
-};
-
-//--------------------------------------------------------------------------------------------------
-// boolInVec functions
-//
-
-// operators
-//
-inline const boolInVec operator == (const boolInVec &vec0, const boolInVec &vec1);
-inline const boolInVec operator != (const boolInVec &vec0, const boolInVec &vec1);
-inline const boolInVec operator & (const boolInVec &vec0, const boolInVec &vec1);
-inline const boolInVec operator ^ (const boolInVec &vec0, const boolInVec &vec1);
-inline const boolInVec operator | (const boolInVec &vec0, const boolInVec &vec1);
-
-// select between vec0 and vec1 using boolInVec.
-// false selects vec0, true selects vec1
-//
-inline const boolInVec select(const boolInVec &vec0, const boolInVec &vec1, const boolInVec &select_vec1);
-
-} // namespace Vectormath
-
-//--------------------------------------------------------------------------------------------------
-// boolInVec implementation
-//
-
-#include "floatInVec.h"
-
-namespace Vectormath {
-
-inline
-boolInVec::boolInVec(__m128 vec)
-{
-    mData = vec;
-}
-
-inline
-boolInVec::boolInVec(const floatInVec &vec)
-{
-    *this = (vec != floatInVec(0.0f));
-}
-
-inline
-boolInVec::boolInVec(bool scalar)
-{
-    unsigned int mask = -(int)scalar;
-	mData = _mm_set1_ps(*(float *)&mask); // TODO: Union
-}
-
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-inline
-bool
-boolInVec::getAsBool() const
-#else
-inline
-boolInVec::operator bool() const
-#endif
-{
-	return *(bool *)&mData;
-}
-
-inline
-__m128
-boolInVec::get128() const
-{
-    return mData;
-}
-
-inline
-const boolInVec
-boolInVec::operator ! () const
-{
-    return boolInVec(_mm_andnot_ps(mData, _mm_cmpneq_ps(_mm_setzero_ps(),_mm_setzero_ps())));
-}
-
-inline
-boolInVec&
-boolInVec::operator = (const boolInVec &vec)
-{
-    mData = vec.mData;
-    return *this;
-}
-
-inline
-boolInVec&
-boolInVec::operator &= (const boolInVec &vec)
-{
-    *this = *this & vec;
-    return *this;
-}
-
-inline
-boolInVec&
-boolInVec::operator ^= (const boolInVec &vec)
-{
-    *this = *this ^ vec;
-    return *this;
-}
-
-inline
-boolInVec&
-boolInVec::operator |= (const boolInVec &vec)
-{
-    *this = *this | vec;
-    return *this;
-}
-
-inline
-const boolInVec
-operator == (const boolInVec &vec0, const boolInVec &vec1)
-{
-	return boolInVec(_mm_cmpeq_ps(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator != (const boolInVec &vec0, const boolInVec &vec1)
-{
-	return boolInVec(_mm_cmpneq_ps(vec0.get128(), vec1.get128()));
-}
-    
-inline
-const boolInVec
-operator & (const boolInVec &vec0, const boolInVec &vec1)
-{
-	return boolInVec(_mm_and_ps(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator | (const boolInVec &vec0, const boolInVec &vec1)
-{
-	return boolInVec(_mm_or_ps(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator ^ (const boolInVec &vec0, const boolInVec &vec1)
-{
-	return boolInVec(_mm_xor_ps(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-select(const boolInVec &vec0, const boolInVec &vec1, const boolInVec &select_vec1)
-{
-	return boolInVec(vec_sel(vec0.get128(), vec1.get128(), select_vec1.get128()));
-}
- 
-} // namespace Vectormath
-
-#endif // boolInVec_h
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _BOOLINVEC_H
+#define _BOOLINVEC_H
+
+#include <math.h>
+
+namespace Vectormath {
+
+class floatInVec;
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec class
+//
+
+class boolInVec
+{
+    private:
+        __m128 mData;
+
+        inline boolInVec(__m128 vec);
+    public:
+        inline boolInVec() {}
+
+        // matches standard type conversions
+        //
+        inline boolInVec(const floatInVec &vec);
+
+        // explicit cast from bool
+        //
+        explicit inline boolInVec(bool scalar);
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+        // explicit cast to bool
+        // 
+        inline bool getAsBool() const;
+#else
+        // implicit cast to bool
+        // 
+        inline operator bool() const;
+#endif
+        
+        // get vector data
+        // bool value is splatted across all word slots of vector as 0 (false) or -1 (true)
+        //
+        inline __m128 get128() const;
+
+        // operators
+        //
+        inline const boolInVec operator ! () const;
+        inline boolInVec& operator = (const boolInVec &vec);
+        inline boolInVec& operator &= (const boolInVec &vec);
+        inline boolInVec& operator ^= (const boolInVec &vec);
+        inline boolInVec& operator |= (const boolInVec &vec);
+
+        // friend functions
+        //
+        friend inline const boolInVec operator == (const boolInVec &vec0, const boolInVec &vec1);
+        friend inline const boolInVec operator != (const boolInVec &vec0, const boolInVec &vec1);
+        friend inline const boolInVec operator < (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const boolInVec operator <= (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const boolInVec operator > (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const boolInVec operator >= (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const boolInVec operator == (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const boolInVec operator != (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const boolInVec operator & (const boolInVec &vec0, const boolInVec &vec1);
+        friend inline const boolInVec operator ^ (const boolInVec &vec0, const boolInVec &vec1);
+        friend inline const boolInVec operator | (const boolInVec &vec0, const boolInVec &vec1);
+        friend inline const boolInVec select(const boolInVec &vec0, const boolInVec &vec1, const boolInVec &select_vec1);
+};
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec functions
+//
+
+// operators
+//
+inline const boolInVec operator == (const boolInVec &vec0, const boolInVec &vec1);
+inline const boolInVec operator != (const boolInVec &vec0, const boolInVec &vec1);
+inline const boolInVec operator & (const boolInVec &vec0, const boolInVec &vec1);
+inline const boolInVec operator ^ (const boolInVec &vec0, const boolInVec &vec1);
+inline const boolInVec operator | (const boolInVec &vec0, const boolInVec &vec1);
+
+// select between vec0 and vec1 using boolInVec.
+// false selects vec0, true selects vec1
+//
+inline const boolInVec select(const boolInVec &vec0, const boolInVec &vec1, const boolInVec &select_vec1);
+
+} // namespace Vectormath
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec implementation
+//
+
+#include "floatInVec.h"
+
+namespace Vectormath {
+
+inline
+boolInVec::boolInVec(__m128 vec)
+{
+    mData = vec;
+}
+
+inline
+boolInVec::boolInVec(const floatInVec &vec)
+{
+    *this = (vec != floatInVec(0.0f));
+}
+
+inline
+boolInVec::boolInVec(bool scalar)
+{
+    unsigned int mask = -(int)scalar;
+	mData = _mm_set1_ps(*(float *)&mask); // TODO: Union
+}
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+inline
+bool
+boolInVec::getAsBool() const
+#else
+inline
+boolInVec::operator bool() const
+#endif
+{
+	return *(bool *)&mData;
+}
+
+inline
+__m128
+boolInVec::get128() const
+{
+    return mData;
+}
+
+inline
+const boolInVec
+boolInVec::operator ! () const
+{
+    return boolInVec(_mm_andnot_ps(mData, _mm_cmpneq_ps(_mm_setzero_ps(),_mm_setzero_ps())));
+}
+
+inline
+boolInVec&
+boolInVec::operator = (const boolInVec &vec)
+{
+    mData = vec.mData;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator &= (const boolInVec &vec)
+{
+    *this = *this & vec;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator ^= (const boolInVec &vec)
+{
+    *this = *this ^ vec;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator |= (const boolInVec &vec)
+{
+    *this = *this | vec;
+    return *this;
+}
+
+inline
+const boolInVec
+operator == (const boolInVec &vec0, const boolInVec &vec1)
+{
+	return boolInVec(_mm_cmpeq_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator != (const boolInVec &vec0, const boolInVec &vec1)
+{
+	return boolInVec(_mm_cmpneq_ps(vec0.get128(), vec1.get128()));
+}
+    
+inline
+const boolInVec
+operator & (const boolInVec &vec0, const boolInVec &vec1)
+{
+	return boolInVec(_mm_and_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator | (const boolInVec &vec0, const boolInVec &vec1)
+{
+	return boolInVec(_mm_or_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator ^ (const boolInVec &vec0, const boolInVec &vec1)
+{
+	return boolInVec(_mm_xor_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+select(const boolInVec &vec0, const boolInVec &vec1, const boolInVec &select_vec1)
+{
+	return boolInVec(vec_sel(vec0.get128(), vec1.get128(), select_vec1.get128()));
+}
+ 
+} // namespace Vectormath
+
+#endif // boolInVec_h
diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/floatInVec.h b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/floatInVec.h
index 6443865b1..e8ac5959e 100644
--- a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/floatInVec.h
+++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/floatInVec.h
@@ -1,340 +1,340 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _FLOATINVEC_H
-#define _FLOATINVEC_H
-
-#include <math.h>
-#include <xmmintrin.h>
-
-namespace Vectormath {
-
-class boolInVec;
-
-//--------------------------------------------------------------------------------------------------
-// floatInVec class
-//
-
-class floatInVec
-{
-    private:
-        __m128 mData;
-
-    public:
-        inline floatInVec(__m128 vec);
-
-        inline floatInVec() {}
-
-        // matches standard type conversions
-        //
-        inline floatInVec(const boolInVec &vec);
-
-        // construct from a slot of __m128
-        //
-        inline floatInVec(__m128 vec, int slot);
-        
-        // explicit cast from float
-        //
-        explicit inline floatInVec(float scalar);
-
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-        // explicit cast to float
-        // 
-        inline float getAsFloat() const;
-#else
-        // implicit cast to float
-        //
-        inline operator float() const;
-#endif
-
-        // get vector data
-        // float value is splatted across all word slots of vector
-        //
-        inline __m128 get128() const;
-
-        // operators
-        // 
-        inline const floatInVec operator ++ (int);
-        inline const floatInVec operator -- (int);
-        inline floatInVec& operator ++ ();
-        inline floatInVec& operator -- ();
-        inline const floatInVec operator - () const;
-        inline floatInVec& operator = (const floatInVec &vec);
-        inline floatInVec& operator *= (const floatInVec &vec);
-        inline floatInVec& operator /= (const floatInVec &vec);
-        inline floatInVec& operator += (const floatInVec &vec);
-        inline floatInVec& operator -= (const floatInVec &vec);
-
-        // friend functions
-        //
-        friend inline const floatInVec operator * (const floatInVec &vec0, const floatInVec &vec1);
-        friend inline const floatInVec operator / (const floatInVec &vec0, const floatInVec &vec1);
-        friend inline const floatInVec operator + (const floatInVec &vec0, const floatInVec &vec1);
-        friend inline const floatInVec operator - (const floatInVec &vec0, const floatInVec &vec1);
-        friend inline const floatInVec select(const floatInVec &vec0, const floatInVec &vec1, boolInVec select_vec1);
-};
-
-//--------------------------------------------------------------------------------------------------
-// floatInVec functions
-//
-
-// operators
-// 
-inline const floatInVec operator * (const floatInVec &vec0, const floatInVec &vec1);
-inline const floatInVec operator / (const floatInVec &vec0, const floatInVec &vec1);
-inline const floatInVec operator + (const floatInVec &vec0, const floatInVec &vec1);
-inline const floatInVec operator - (const floatInVec &vec0, const floatInVec &vec1);
-inline const boolInVec operator < (const floatInVec &vec0, const floatInVec &vec1);
-inline const boolInVec operator <= (const floatInVec &vec0, const floatInVec &vec1);
-inline const boolInVec operator > (const floatInVec &vec0, const floatInVec &vec1);
-inline const boolInVec operator >= (const floatInVec &vec0, const floatInVec &vec1);
-inline const boolInVec operator == (const floatInVec &vec0, const floatInVec &vec1);
-inline const boolInVec operator != (const floatInVec &vec0, const floatInVec &vec1);
-
-// select between vec0 and vec1 using boolInVec.
-// false selects vec0, true selects vec1
-//
-inline const floatInVec select(const floatInVec &vec0, const floatInVec &vec1, const boolInVec &select_vec1);
-
-} // namespace Vectormath
-
-//--------------------------------------------------------------------------------------------------
-// floatInVec implementation
-//
-
-#include "boolInVec.h"
-
-namespace Vectormath {
-
-inline
-floatInVec::floatInVec(__m128 vec)
-{
-    mData = vec;
-}
-
-inline
-floatInVec::floatInVec(const boolInVec &vec)
-{
-	mData = vec_sel(_mm_setzero_ps(), _mm_set1_ps(1.0f), vec.get128());
-}
-
-inline
-floatInVec::floatInVec(__m128 vec, int slot)
-{
-	SSEFloat v;
-	v.m128 = vec;
-	mData = _mm_set1_ps(v.f[slot]);
-}
-
-inline
-floatInVec::floatInVec(float scalar)
-{
-	mData = _mm_set1_ps(scalar);
-}
-
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-inline
-float
-floatInVec::getAsFloat() const
-#else
-inline
-floatInVec::operator float() const
-#endif
-{
-    return *((float *)&mData);
-}
-
-inline
-__m128
-floatInVec::get128() const
-{
-    return mData;
-}
-
-inline
-const floatInVec
-floatInVec::operator ++ (int)
-{
-    __m128 olddata = mData;
-    operator ++();
-    return floatInVec(olddata);
-}
-
-inline
-const floatInVec
-floatInVec::operator -- (int)
-{
-    __m128 olddata = mData;
-    operator --();
-    return floatInVec(olddata);
-}
-
-inline
-floatInVec&
-floatInVec::operator ++ ()
-{
-    *this += floatInVec(_mm_set1_ps(1.0f));
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator -- ()
-{
-    *this -= floatInVec(_mm_set1_ps(1.0f));
-    return *this;
-}
-
-inline
-const floatInVec
-floatInVec::operator - () const
-{
-    return floatInVec(_mm_sub_ps(_mm_setzero_ps(), mData));
-}
-
-inline
-floatInVec&
-floatInVec::operator = (const floatInVec &vec)
-{
-    mData = vec.mData;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator *= (const floatInVec &vec)
-{
-    *this = *this * vec;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator /= (const floatInVec &vec)
-{
-    *this = *this / vec;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator += (const floatInVec &vec)
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator -= (const floatInVec &vec)
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline
-const floatInVec
-operator * (const floatInVec &vec0, const floatInVec &vec1)
-{
-    return floatInVec(_mm_mul_ps(vec0.get128(), vec1.get128()));
-}
-
-inline
-const floatInVec
-operator / (const floatInVec &num, const floatInVec &den)
-{
-    return floatInVec(_mm_div_ps(num.get128(), den.get128()));
-}
-
-inline
-const floatInVec
-operator + (const floatInVec &vec0, const floatInVec &vec1)
-{
-    return floatInVec(_mm_add_ps(vec0.get128(), vec1.get128()));
-}
-
-inline
-const floatInVec
-operator - (const floatInVec &vec0, const floatInVec &vec1)
-{
-    return floatInVec(_mm_sub_ps(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator < (const floatInVec &vec0, const floatInVec &vec1)
-{
-    return boolInVec(_mm_cmpgt_ps(vec1.get128(), vec0.get128()));
-}
-
-inline
-const boolInVec
-operator <= (const floatInVec &vec0, const floatInVec &vec1)
-{
-    return boolInVec(_mm_cmpge_ps(vec1.get128(), vec0.get128()));
-}
-
-inline
-const boolInVec
-operator > (const floatInVec &vec0, const floatInVec &vec1)
-{
-    return boolInVec(_mm_cmpgt_ps(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator >= (const floatInVec &vec0, const floatInVec &vec1)
-{
-    return boolInVec(_mm_cmpge_ps(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator == (const floatInVec &vec0, const floatInVec &vec1)
-{
-    return boolInVec(_mm_cmpeq_ps(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator != (const floatInVec &vec0, const floatInVec &vec1)
-{
-    return boolInVec(_mm_cmpneq_ps(vec0.get128(), vec1.get128()));
-}
-    
-inline
-const floatInVec
-select(const floatInVec &vec0, const floatInVec &vec1, const boolInVec &select_vec1)
-{
-    return floatInVec(vec_sel(vec0.get128(), vec1.get128(), select_vec1.get128()));
-}
-
-} // namespace Vectormath
-
-#endif // floatInVec_h
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _FLOATINVEC_H
+#define _FLOATINVEC_H
+
+#include <math.h>
+#include <xmmintrin.h>
+
+namespace Vectormath {
+
+class boolInVec;
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec class
+//
+
+class floatInVec
+{
+    private:
+        __m128 mData;
+
+    public:
+        inline floatInVec(__m128 vec);
+
+        inline floatInVec() {}
+
+        // matches standard type conversions
+        //
+        inline floatInVec(const boolInVec &vec);
+
+        // construct from a slot of __m128
+        //
+        inline floatInVec(__m128 vec, int slot);
+        
+        // explicit cast from float
+        //
+        explicit inline floatInVec(float scalar);
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+        // explicit cast to float
+        // 
+        inline float getAsFloat() const;
+#else
+        // implicit cast to float
+        //
+        inline operator float() const;
+#endif
+
+        // get vector data
+        // float value is splatted across all word slots of vector
+        //
+        inline __m128 get128() const;
+
+        // operators
+        // 
+        inline const floatInVec operator ++ (int);
+        inline const floatInVec operator -- (int);
+        inline floatInVec& operator ++ ();
+        inline floatInVec& operator -- ();
+        inline const floatInVec operator - () const;
+        inline floatInVec& operator = (const floatInVec &vec);
+        inline floatInVec& operator *= (const floatInVec &vec);
+        inline floatInVec& operator /= (const floatInVec &vec);
+        inline floatInVec& operator += (const floatInVec &vec);
+        inline floatInVec& operator -= (const floatInVec &vec);
+
+        // friend functions
+        //
+        friend inline const floatInVec operator * (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const floatInVec operator / (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const floatInVec operator + (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const floatInVec operator - (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const floatInVec select(const floatInVec &vec0, const floatInVec &vec1, boolInVec select_vec1);
+};
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec functions
+//
+
+// operators
+// 
+inline const floatInVec operator * (const floatInVec &vec0, const floatInVec &vec1);
+inline const floatInVec operator / (const floatInVec &vec0, const floatInVec &vec1);
+inline const floatInVec operator + (const floatInVec &vec0, const floatInVec &vec1);
+inline const floatInVec operator - (const floatInVec &vec0, const floatInVec &vec1);
+inline const boolInVec operator < (const floatInVec &vec0, const floatInVec &vec1);
+inline const boolInVec operator <= (const floatInVec &vec0, const floatInVec &vec1);
+inline const boolInVec operator > (const floatInVec &vec0, const floatInVec &vec1);
+inline const boolInVec operator >= (const floatInVec &vec0, const floatInVec &vec1);
+inline const boolInVec operator == (const floatInVec &vec0, const floatInVec &vec1);
+inline const boolInVec operator != (const floatInVec &vec0, const floatInVec &vec1);
+
+// select between vec0 and vec1 using boolInVec.
+// false selects vec0, true selects vec1
+//
+inline const floatInVec select(const floatInVec &vec0, const floatInVec &vec1, const boolInVec &select_vec1);
+
+} // namespace Vectormath
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec implementation
+//
+
+#include "boolInVec.h"
+
+namespace Vectormath {
+
+inline
+floatInVec::floatInVec(__m128 vec)
+{
+    mData = vec;
+}
+
+inline
+floatInVec::floatInVec(const boolInVec &vec)
+{
+	mData = vec_sel(_mm_setzero_ps(), _mm_set1_ps(1.0f), vec.get128());
+}
+
+inline
+floatInVec::floatInVec(__m128 vec, int slot)
+{
+	SSEFloat v;
+	v.m128 = vec;
+	mData = _mm_set1_ps(v.f[slot]);
+}
+
+inline
+floatInVec::floatInVec(float scalar)
+{
+	mData = _mm_set1_ps(scalar);
+}
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+inline
+float
+floatInVec::getAsFloat() const
+#else
+inline
+floatInVec::operator float() const
+#endif
+{
+    return *((float *)&mData);
+}
+
+inline
+__m128
+floatInVec::get128() const
+{
+    return mData;
+}
+
+inline
+const floatInVec
+floatInVec::operator ++ (int)
+{
+    __m128 olddata = mData;
+    operator ++();
+    return floatInVec(olddata);
+}
+
+inline
+const floatInVec
+floatInVec::operator -- (int)
+{
+    __m128 olddata = mData;
+    operator --();
+    return floatInVec(olddata);
+}
+
+inline
+floatInVec&
+floatInVec::operator ++ ()
+{
+    *this += floatInVec(_mm_set1_ps(1.0f));
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator -- ()
+{
+    *this -= floatInVec(_mm_set1_ps(1.0f));
+    return *this;
+}
+
+inline
+const floatInVec
+floatInVec::operator - () const
+{
+    return floatInVec(_mm_sub_ps(_mm_setzero_ps(), mData));
+}
+
+inline
+floatInVec&
+floatInVec::operator = (const floatInVec &vec)
+{
+    mData = vec.mData;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator *= (const floatInVec &vec)
+{
+    *this = *this * vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator /= (const floatInVec &vec)
+{
+    *this = *this / vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator += (const floatInVec &vec)
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator -= (const floatInVec &vec)
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline
+const floatInVec
+operator * (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return floatInVec(_mm_mul_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const floatInVec
+operator / (const floatInVec &num, const floatInVec &den)
+{
+    return floatInVec(_mm_div_ps(num.get128(), den.get128()));
+}
+
+inline
+const floatInVec
+operator + (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return floatInVec(_mm_add_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const floatInVec
+operator - (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return floatInVec(_mm_sub_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator < (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return boolInVec(_mm_cmpgt_ps(vec1.get128(), vec0.get128()));
+}
+
+inline
+const boolInVec
+operator <= (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return boolInVec(_mm_cmpge_ps(vec1.get128(), vec0.get128()));
+}
+
+inline
+const boolInVec
+operator > (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return boolInVec(_mm_cmpgt_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator >= (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return boolInVec(_mm_cmpge_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator == (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return boolInVec(_mm_cmpeq_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator != (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return boolInVec(_mm_cmpneq_ps(vec0.get128(), vec1.get128()));
+}
+    
+inline
+const floatInVec
+select(const floatInVec &vec0, const floatInVec &vec1, const boolInVec &select_vec1)
+{
+    return floatInVec(vec_sel(vec0.get128(), vec1.get128(), select_vec1.get128()));
+}
+
+} // namespace Vectormath
+
+#endif // floatInVec_h
diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/mat_aos.h b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/mat_aos.h
index 6bdc5b3fa..92cbdc45a 100644
--- a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/mat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/mat_aos.h
@@ -1,2187 +1,2187 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-
-#ifndef _VECTORMATH_MAT_AOS_CPP_H
-#define _VECTORMATH_MAT_AOS_CPP_H
-
-namespace Vectormath {
-namespace Aos {
-
-//-----------------------------------------------------------------------------
-// Constants
-// for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
-
-#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B })
-#define _VECTORMATH_PERM_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_XZBX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X })     
-#define _VECTORMATH_PERM_CXXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_YAXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C })
-#define _VECTORMATH_PERM_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W, _VECTORMATH_PERM_Z })
-#define _VECTORMATH_PERM_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y })
-#define _VECTORMATH_PERM_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C })
-#define _VECTORMATH_PERM_ZAYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_BZXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
-#define _VECTORMATH_PERM_ZXXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_B })
-#define _VECTORMATH_PERM_YXXC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_C })
-#define _VECTORMATH_PERM_BBYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-inline Matrix3::Matrix3( const Matrix3 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-}
-
-inline Matrix3::Matrix3( float scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-}
-
-inline Matrix3::Matrix3( const floatInVec &scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-}
-
-inline Matrix3::Matrix3( const Quat &unitQuat )
-{
-    __m128 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2;
-    __m128 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-	__declspec(align(16)) unsigned int sx[4] = {0xffffffff, 0, 0, 0};
-	__declspec(align(16)) unsigned int sz[4] = {0, 0, 0xffffffff, 0};
-	__m128 select_x = _mm_load_ps((float *)sx);
-	__m128 select_z = _mm_load_ps((float *)sz);
-
-    xyzw_2 = _mm_add_ps( unitQuat.get128(), unitQuat.get128() );
-    wwww = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,3,3,3) );
-	yzxw = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,0,2,1) );
-	zxyw = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,1,0,2) );
-    yzxw_2 = _mm_shuffle_ps( xyzw_2, xyzw_2, _MM_SHUFFLE(3,0,2,1) );
-    zxyw_2 = _mm_shuffle_ps( xyzw_2, xyzw_2, _MM_SHUFFLE(3,1,0,2) );
-
-    tmp0 = _mm_mul_ps( yzxw_2, wwww );									// tmp0 = 2yw, 2zw, 2xw, 2w2
-	tmp1 = _mm_sub_ps( _mm_set1_ps(1.0f), _mm_mul_ps(yzxw, yzxw_2) );	// tmp1 = 1 - 2y2, 1 - 2z2, 1 - 2x2, 1 - 2w2
-    tmp2 = _mm_mul_ps( yzxw, xyzw_2 );									// tmp2 = 2xy, 2yz, 2xz, 2w2
-    tmp0 = _mm_add_ps( _mm_mul_ps(zxyw, xyzw_2), tmp0 );				// tmp0 = 2yw + 2zx, 2zw + 2xy, 2xw + 2yz, 2w2 + 2w2
-    tmp1 = _mm_sub_ps( tmp1, _mm_mul_ps(zxyw, zxyw_2) );				// tmp1 = 1 - 2y2 - 2z2, 1 - 2z2 - 2x2, 1 - 2x2 - 2y2, 1 - 2w2 - 2w2
-    tmp2 = _mm_sub_ps( tmp2, _mm_mul_ps(zxyw_2, wwww) );				// tmp2 = 2xy - 2zw, 2yz - 2xw, 2xz - 2yw, 2w2 -2w2
-
-    tmp3 = vec_sel( tmp0, tmp1, select_x );
-    tmp4 = vec_sel( tmp1, tmp2, select_x );
-    tmp5 = vec_sel( tmp2, tmp0, select_x );
-    mCol0 = Vector3( vec_sel( tmp3, tmp2, select_z ) );
-    mCol1 = Vector3( vec_sel( tmp4, tmp0, select_z ) );
-    mCol2 = Vector3( vec_sel( tmp5, tmp1, select_z ) );
-}
-
-inline Matrix3::Matrix3( const Vector3 &_col0, const Vector3 &_col1, const Vector3 &_col2 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-}
-
-inline Matrix3 & Matrix3::setCol0( const Vector3 &_col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol1( const Vector3 &_col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol2( const Vector3 &_col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol( int col, const Vector3 &vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setRow( int row, const Vector3 &vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setElem( int col, int row, float val )
-{
-    (*this)[col].setElem(row, val);
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setElem( int col, int row, const floatInVec &val )
-{
-    Vector3 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline const floatInVec Matrix3::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector3 Matrix3::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector3 Matrix3::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector3 Matrix3::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector3 Matrix3::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Matrix3::getRow( int row ) const
-{
-    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
-}
-
-inline Vector3 & Matrix3::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Matrix3::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    return *this;
-}
-
-inline const Matrix3 transpose( const Matrix3 & mat )
-{
-    __m128 tmp0, tmp1, res0, res1, res2;
-    tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
-    tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
-    res0 = vec_mergeh( tmp0, mat.getCol1().get128() );
-    //res1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
-	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
-	res1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2));
-	res1 = vec_sel(res1, mat.getCol1().get128(), select_y);
-    //res2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX );
-	res2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0));
-	res2 = vec_sel(res2, vec_splat(mat.getCol1().get128(), 2), select_y);
-    return Matrix3(
-        Vector3( res0 ),
-        Vector3( res1 ),
-        Vector3( res2 )
-    );
-}
-
-inline const Matrix3 inverse( const Matrix3 & mat )
-{
-    __m128 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2;
-    tmp2 = _vmathVfCross( mat.getCol0().get128(), mat.getCol1().get128() );
-    tmp0 = _vmathVfCross( mat.getCol1().get128(), mat.getCol2().get128() );
-    tmp1 = _vmathVfCross( mat.getCol2().get128(), mat.getCol0().get128() );
-    dot = _vmathVfDot3( tmp2, mat.getCol2().get128() );
-    dot = vec_splat( dot, 0 );
-    invdet = recipf4( dot );
-    tmp3 = vec_mergeh( tmp0, tmp2 );
-    tmp4 = vec_mergel( tmp0, tmp2 );
-    inv0 = vec_mergeh( tmp3, tmp1 );
-    //inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
-	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
-	inv1 = _mm_shuffle_ps( tmp3, tmp3, _MM_SHUFFLE(0,3,2,2));
-	inv1 = vec_sel(inv1, tmp1, select_y);
-    //inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
-	inv2 = _mm_shuffle_ps( tmp4, tmp4, _MM_SHUFFLE(0,1,1,0));
-	inv2 = vec_sel(inv2, vec_splat(tmp1, 2), select_y);
-    inv0 = vec_mul( inv0, invdet );
-    inv1 = vec_mul( inv1, invdet );
-	inv2 = vec_mul( inv2, invdet );
-    return Matrix3(
-        Vector3( inv0 ),
-        Vector3( inv1 ),
-        Vector3( inv2 )
-    );
-}
-
-inline const floatInVec determinant( const Matrix3 & mat )
-{
-    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
-}
-
-inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( mCol0 + mat.mCol0 ),
-        ( mCol1 + mat.mCol1 ),
-        ( mCol2 + mat.mCol2 )
-    );
-}
-
-inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( mCol0 - mat.mCol0 ),
-        ( mCol1 - mat.mCol1 ),
-        ( mCol2 - mat.mCol2 )
-    );
-}
-
-inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
-{
-    *this = *this + mat;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
-{
-    *this = *this - mat;
-    return *this;
-}
-
-inline const Matrix3 Matrix3::operator -( ) const
-{
-    return Matrix3(
-        ( -mCol0 ),
-        ( -mCol1 ),
-        ( -mCol2 )
-    );
-}
-
-inline const Matrix3 absPerElem( const Matrix3 & mat )
-{
-    return Matrix3(
-        absPerElem( mat.getCol0() ),
-        absPerElem( mat.getCol1() ),
-        absPerElem( mat.getCol2() )
-    );
-}
-
-inline const Matrix3 Matrix3::operator *( float scalar ) const
-{
-    return *this * floatInVec(scalar);
-}
-
-inline const Matrix3 Matrix3::operator *( const floatInVec &scalar ) const
-{
-    return Matrix3(
-        ( mCol0 * scalar ),
-        ( mCol1 * scalar ),
-        ( mCol2 * scalar )
-    );
-}
-
-inline Matrix3 & Matrix3::operator *=( float scalar )
-{
-    return *this *= floatInVec(scalar);
-}
-
-inline Matrix3 & Matrix3::operator *=( const floatInVec &scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Matrix3 operator *( float scalar, const Matrix3 & mat )
-{
-    return floatInVec(scalar) * mat;
-}
-
-inline const Matrix3 operator *( const floatInVec &scalar, const Matrix3 & mat )
-{
-    return mat * scalar;
-}
-
-inline const Vector3 Matrix3::operator *( const Vector3 &vec ) const
-{
-    __m128 res;
-    __m128 xxxx, yyyy, zzzz;
-    xxxx = vec_splat( vec.get128(), 0 );
-    yyyy = vec_splat( vec.get128(), 1 );
-    zzzz = vec_splat( vec.get128(), 2 );
-    res = vec_mul( mCol0.get128(), xxxx );
-    res = vec_madd( mCol1.get128(), yyyy, res );
-    res = vec_madd( mCol2.get128(), zzzz, res );
-    return Vector3( res );
-}
-
-inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( *this * mat.mCol0 ),
-        ( *this * mat.mCol1 ),
-        ( *this * mat.mCol2 )
-    );
-}
-
-inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
-{
-    *this = *this * mat;
-    return *this;
-}
-
-inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
-{
-    return Matrix3(
-        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
-        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
-        mulPerElem( mat0.getCol2(), mat1.getCol2() )
-    );
-}
-
-inline const Matrix3 Matrix3::identity( )
-{
-    return Matrix3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationX( float radians )
-{
-    return rotationX( floatInVec(radians) );
-}
-
-inline const Matrix3 Matrix3::rotationX( const floatInVec &radians )
-{
-    __m128 s, c, res1, res2;
-    __m128 zero;
-	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
-	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
-    zero = _mm_setzero_ps();
-    sincosf4( radians.get128(), &s, &c );
-    res1 = vec_sel( zero, c, select_y );
-    res1 = vec_sel( res1, s, select_z );
-    res2 = vec_sel( zero, negatef4(s), select_y );
-    res2 = vec_sel( res2, c, select_z );
-    return Matrix3(
-        Vector3::xAxis( ),
-        Vector3( res1 ),
-        Vector3( res2 )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationY( float radians )
-{
-    return rotationY( floatInVec(radians) );
-}
-
-inline const Matrix3 Matrix3::rotationY( const floatInVec &radians )
-{
-    __m128 s, c, res0, res2;
-    __m128 zero;
-	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
-	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
-    zero = _mm_setzero_ps();
-    sincosf4( radians.get128(), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, negatef4(s), select_z );
-    res2 = vec_sel( zero, s, select_x );
-    res2 = vec_sel( res2, c, select_z );
-    return Matrix3(
-        Vector3( res0 ),
-        Vector3::yAxis( ),
-        Vector3( res2 )
-	);
-}
-
-inline const Matrix3 Matrix3::rotationZ( float radians )
-{
-    return rotationZ( floatInVec(radians) );
-}
-
-inline const Matrix3 Matrix3::rotationZ( const floatInVec &radians )
-{
-    __m128 s, c, res0, res1;
-    __m128 zero;
-	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
-	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
-    zero = _mm_setzero_ps();
-    sincosf4( radians.get128(), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, s, select_y );
-    res1 = vec_sel( zero, negatef4(s), select_x );
-    res1 = vec_sel( res1, c, select_y );
-    return Matrix3(
-        Vector3( res0 ),
-        Vector3( res1 ),
-        Vector3::zAxis( )
-	);
-}
-
-inline const Matrix3 Matrix3::rotationZYX( const Vector3 &radiansXYZ )
-{
-    __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    angles = Vector4( radiansXYZ, 0.0f ).get128();
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = vec_mergel( c, s );
-    Z1 = vec_mergel( negS, c );
-	__declspec(align(16)) unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
-    Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) );
-	Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) );
-	Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) );
-    X0 = vec_splat( s, 0 );
-    X1 = vec_splat( c, 0 );
-    tmp = vec_mul( Z0, Y1 );
-    return Matrix3(
-        Vector3( vec_mul( Z0, Y0 ) ),
-        Vector3( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ),
-        Vector3( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) )
-    );
-}
-
-inline const Matrix3 Matrix3::rotation( float radians, const Vector3 &unitVec )
-{
-    return rotation( floatInVec(radians), unitVec );
-}
-
-inline const Matrix3 Matrix3::rotation( const floatInVec &radians, const Vector3 &unitVec )
-{
-    __m128 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
-    axis = unitVec.get128();
-    sincosf4( radians.get128(), &s, &c );
-    xxxx = vec_splat( axis, 0 );
-    yyyy = vec_splat( axis, 1 );
-    zzzz = vec_splat( axis, 2 );
-    oneMinusC = vec_sub( _mm_set1_ps(1.0f), c );
-    axisS = vec_mul( axis, s );
-    negAxisS = negatef4( axisS );
-	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
-	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
-	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
-    //tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
-	tmp0 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,2,0) );
-	tmp0 = vec_sel(tmp0, vec_splat(negAxisS, 1), select_z);
-    //tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
-	tmp1 = vec_sel( vec_splat(axisS, 0), vec_splat(negAxisS, 2), select_x );
-    //tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
-	tmp2 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,0,1) );
-	tmp2 = vec_sel(tmp2, vec_splat(negAxisS, 0), select_y);
-    tmp0 = vec_sel( tmp0, c, select_x );
-    tmp1 = vec_sel( tmp1, c, select_y );
-    tmp2 = vec_sel( tmp2, c, select_z );
-    return Matrix3(
-        Vector3( vec_madd( vec_mul( axis, xxxx ), oneMinusC, tmp0 ) ),
-        Vector3( vec_madd( vec_mul( axis, yyyy ), oneMinusC, tmp1 ) ),
-        Vector3( vec_madd( vec_mul( axis, zzzz ), oneMinusC, tmp2 ) )
-    );
-}
-
-inline const Matrix3 Matrix3::rotation( const Quat &unitQuat )
-{
-    return Matrix3( unitQuat );
-}
-
-inline const Matrix3 Matrix3::scale( const Vector3 &scaleVec )
-{
-    __m128 zero = _mm_setzero_ps();
-	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
-	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
-	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
-    return Matrix3(
-        Vector3( vec_sel( zero, scaleVec.get128(), select_x ) ),
-        Vector3( vec_sel( zero, scaleVec.get128(), select_y ) ),
-        Vector3( vec_sel( zero, scaleVec.get128(), select_z ) )
-    );
-}
-
-inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 &scaleVec )
-{
-    return Matrix3(
-        ( mat.getCol0() * scaleVec.getX( ) ),
-        ( mat.getCol1() * scaleVec.getY( ) ),
-        ( mat.getCol2() * scaleVec.getZ( ) )
-    );
-}
-
-inline const Matrix3 prependScale( const Vector3 &scaleVec, const Matrix3 & mat )
-{
-    return Matrix3(
-        mulPerElem( mat.getCol0(), scaleVec ),
-        mulPerElem( mat.getCol1(), scaleVec ),
-        mulPerElem( mat.getCol2(), scaleVec )
-    );
-}
-
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 )
-{
-    return Matrix3(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 )
-    );
-}
-
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, const boolInVec &select1 )
-{
-    return Matrix3(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Matrix3 & mat )
-{
-    print( mat.getRow( 0 ) );
-    print( mat.getRow( 1 ) );
-    print( mat.getRow( 2 ) );
-}
-
-inline void print( const Matrix3 & mat, const char * name )
-{
-    printf("%s:\n", name);
-    print( mat );
-}
-
-#endif
-
-inline Matrix4::Matrix4( const Matrix4 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    mCol3 = mat.mCol3;
-}
-
-inline Matrix4::Matrix4( float scalar )
-{
-    mCol0 = Vector4( scalar );
-    mCol1 = Vector4( scalar );
-    mCol2 = Vector4( scalar );
-    mCol3 = Vector4( scalar );
-}
-
-inline Matrix4::Matrix4( const floatInVec &scalar )
-{
-    mCol0 = Vector4( scalar );
-    mCol1 = Vector4( scalar );
-    mCol2 = Vector4( scalar );
-    mCol3 = Vector4( scalar );
-}
-
-inline Matrix4::Matrix4( const Transform3 & mat )
-{
-    mCol0 = Vector4( mat.getCol0(), 0.0f );
-    mCol1 = Vector4( mat.getCol1(), 0.0f );
-    mCol2 = Vector4( mat.getCol2(), 0.0f );
-    mCol3 = Vector4( mat.getCol3(), 1.0f );
-}
-
-inline Matrix4::Matrix4( const Vector4 &_col0, const Vector4 &_col1, const Vector4 &_col2, const Vector4 &_col3 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-    mCol3 = _col3;
-}
-
-inline Matrix4::Matrix4( const Matrix3 & mat, const Vector3 &translateVec )
-{
-    mCol0 = Vector4( mat.getCol0(), 0.0f );
-    mCol1 = Vector4( mat.getCol1(), 0.0f );
-    mCol2 = Vector4( mat.getCol2(), 0.0f );
-    mCol3 = Vector4( translateVec, 1.0f );
-}
-
-inline Matrix4::Matrix4( const Quat &unitQuat, const Vector3 &translateVec )
-{
-    Matrix3 mat;
-    mat = Matrix3( unitQuat );
-    mCol0 = Vector4( mat.getCol0(), 0.0f );
-    mCol1 = Vector4( mat.getCol1(), 0.0f );
-    mCol2 = Vector4( mat.getCol2(), 0.0f );
-    mCol3 = Vector4( translateVec, 1.0f );
-}
-
-inline Matrix4 & Matrix4::setCol0( const Vector4 &_col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol1( const Vector4 &_col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol2( const Vector4 &_col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol3( const Vector4 &_col3 )
-{
-    mCol3 = _col3;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol( int col, const Vector4 &vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setRow( int row, const Vector4 &vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    mCol3.setElem( row, vec.getElem( 3 ) );
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setElem( int col, int row, float val )
-{
-    (*this)[col].setElem(row, val);
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setElem( int col, int row, const floatInVec &val )
-{
-    Vector4 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline const floatInVec Matrix4::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector4 Matrix4::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector4 Matrix4::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector4 Matrix4::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector4 Matrix4::getCol3( ) const
-{
-    return mCol3;
-}
-
-inline const Vector4 Matrix4::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Matrix4::getRow( int row ) const
-{
-    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
-}
-
-inline Vector4 & Matrix4::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Matrix4::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    mCol3 = mat.mCol3;
-    return *this;
-}
-
-inline const Matrix4 transpose( const Matrix4 & mat )
-{
-    __m128 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3;
-    tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
-    tmp1 = vec_mergeh( mat.getCol1().get128(), mat.getCol3().get128() );
-    tmp2 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
-    tmp3 = vec_mergel( mat.getCol1().get128(), mat.getCol3().get128() );
-    res0 = vec_mergeh( tmp0, tmp1 );
-    res1 = vec_mergel( tmp0, tmp1 );
-    res2 = vec_mergeh( tmp2, tmp3 );
-    res3 = vec_mergel( tmp2, tmp3 );
-    return Matrix4(
-        Vector4( res0 ),
-        Vector4( res1 ),
-        Vector4( res2 ),
-        Vector4( res3 )
-    );
-}
-
-// TODO: Tidy
-static __declspec(align(16)) const unsigned int _vmathPNPN[4] = {0x00000000, 0x80000000, 0x00000000, 0x80000000};
-static __declspec(align(16)) const unsigned int _vmathNPNP[4] = {0x80000000, 0x00000000, 0x80000000, 0x00000000};
-static __declspec(align(16)) const float _vmathZERONE[4] = {1.0f, 0.0f, 0.0f, 1.0f};
-
-inline const Matrix4 inverse( const Matrix4 & mat )
-{
-	__m128 Va,Vb,Vc;
-	__m128 r1,r2,r3,tt,tt2;
-	__m128 sum,Det,RDet;
-	__m128 trns0,trns1,trns2,trns3;
-
-	__m128 _L1 = mat.getCol0().get128();
-	__m128 _L2 = mat.getCol1().get128();
-	__m128 _L3 = mat.getCol2().get128();
-	__m128 _L4 = mat.getCol3().get128();
-	// Calculating the minterms for the first line.
-
-	// _mm_ror_ps is just a macro using _mm_shuffle_ps().
-	tt = _L4; tt2 = _mm_ror_ps(_L3,1); 
-	Vc = _mm_mul_ps(tt2,_mm_ror_ps(tt,0));					// V3'�V4
-	Va = _mm_mul_ps(tt2,_mm_ror_ps(tt,2));					// V3'�V4"
-	Vb = _mm_mul_ps(tt2,_mm_ror_ps(tt,3));					// V3'�V4^
-
-	r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2));		// V3"�V4^ - V3^�V4"
-	r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0));		// V3^�V4' - V3'�V4^
-	r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1));		// V3'�V4" - V3"�V4'
-
-	tt = _L2;
-	Va = _mm_ror_ps(tt,1);		sum = _mm_mul_ps(Va,r1);
-	Vb = _mm_ror_ps(tt,2);		sum = _mm_add_ps(sum,_mm_mul_ps(Vb,r2));
-	Vc = _mm_ror_ps(tt,3);		sum = _mm_add_ps(sum,_mm_mul_ps(Vc,r3));
-
-	// Calculating the determinant.
-	Det = _mm_mul_ps(sum,_L1);
-	Det = _mm_add_ps(Det,_mm_movehl_ps(Det,Det));
-
-	const __m128 Sign_PNPN = _mm_load_ps((float *)_vmathPNPN);
-	const __m128 Sign_NPNP = _mm_load_ps((float *)_vmathNPNP);
-
-	__m128 mtL1 = _mm_xor_ps(sum,Sign_PNPN);
-
-	// Calculating the minterms of the second line (using previous results).
-	tt = _mm_ror_ps(_L1,1);		sum = _mm_mul_ps(tt,r1);
-	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
-	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
-	__m128 mtL2 = _mm_xor_ps(sum,Sign_NPNP);
-
-	// Testing the determinant.
-	Det = _mm_sub_ss(Det,_mm_shuffle_ps(Det,Det,1));
-
-	// Calculating the minterms of the third line.
-	tt = _mm_ror_ps(_L1,1);
-	Va = _mm_mul_ps(tt,Vb);									// V1'�V2"
-	Vb = _mm_mul_ps(tt,Vc);									// V1'�V2^
-	Vc = _mm_mul_ps(tt,_L2);								// V1'�V2
-
-	r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2));		// V1"�V2^ - V1^�V2"
-	r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0));		// V1^�V2' - V1'�V2^
-	r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1));		// V1'�V2" - V1"�V2'
-
-	tt = _mm_ror_ps(_L4,1);		sum = _mm_mul_ps(tt,r1);
-	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
-	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
-	__m128 mtL3 = _mm_xor_ps(sum,Sign_PNPN);
-
-	// Dividing is FASTER than rcp_nr! (Because rcp_nr causes many register-memory RWs).
-	RDet = _mm_div_ss(_mm_load_ss((float *)&_vmathZERONE), Det); // TODO: just 1.0f?
-	RDet = _mm_shuffle_ps(RDet,RDet,0x00);
-
-	// Devide the first 12 minterms with the determinant.
-	mtL1 = _mm_mul_ps(mtL1, RDet);
-	mtL2 = _mm_mul_ps(mtL2, RDet);
-	mtL3 = _mm_mul_ps(mtL3, RDet);
-
-	// Calculate the minterms of the forth line and devide by the determinant.
-	tt = _mm_ror_ps(_L3,1);		sum = _mm_mul_ps(tt,r1);
-	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
-	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
-	__m128 mtL4 = _mm_xor_ps(sum,Sign_NPNP);
-	mtL4 = _mm_mul_ps(mtL4, RDet);
-
-	// Now we just have to transpose the minterms matrix.
-	trns0 = _mm_unpacklo_ps(mtL1,mtL2);
-	trns1 = _mm_unpacklo_ps(mtL3,mtL4);
-	trns2 = _mm_unpackhi_ps(mtL1,mtL2);
-	trns3 = _mm_unpackhi_ps(mtL3,mtL4);
-	_L1 = _mm_movelh_ps(trns0,trns1);
-	_L2 = _mm_movehl_ps(trns1,trns0);
-	_L3 = _mm_movelh_ps(trns2,trns3);
-	_L4 = _mm_movehl_ps(trns3,trns2);
-
-    return Matrix4(
-        Vector4( _L1 ),
-        Vector4( _L2 ),
-        Vector4( _L3 ),
-        Vector4( _L4 )
-    );
-}
-
-inline const Matrix4 affineInverse( const Matrix4 & mat )
-{
-    Transform3 affineMat;
-    affineMat.setCol0( mat.getCol0().getXYZ( ) );
-    affineMat.setCol1( mat.getCol1().getXYZ( ) );
-    affineMat.setCol2( mat.getCol2().getXYZ( ) );
-    affineMat.setCol3( mat.getCol3().getXYZ( ) );
-    return Matrix4( inverse( affineMat ) );
-}
-
-inline const Matrix4 orthoInverse( const Matrix4 & mat )
-{
-    Transform3 affineMat;
-    affineMat.setCol0( mat.getCol0().getXYZ( ) );
-    affineMat.setCol1( mat.getCol1().getXYZ( ) );
-    affineMat.setCol2( mat.getCol2().getXYZ( ) );
-    affineMat.setCol3( mat.getCol3().getXYZ( ) );
-    return Matrix4( orthoInverse( affineMat ) );
-}
-
-inline const floatInVec determinant( const Matrix4 & mat )
-{
-	__m128 Va,Vb,Vc;
-	__m128 r1,r2,r3,tt,tt2;
-	__m128 sum,Det;
-
-	__m128 _L1 = mat.getCol0().get128();
-	__m128 _L2 = mat.getCol1().get128();
-	__m128 _L3 = mat.getCol2().get128();
-	__m128 _L4 = mat.getCol3().get128();
-	// Calculating the minterms for the first line.
-
-	// _mm_ror_ps is just a macro using _mm_shuffle_ps().
-	tt = _L4; tt2 = _mm_ror_ps(_L3,1); 
-	Vc = _mm_mul_ps(tt2,_mm_ror_ps(tt,0));					// V3'�V4
-	Va = _mm_mul_ps(tt2,_mm_ror_ps(tt,2));					// V3'�V4"
-	Vb = _mm_mul_ps(tt2,_mm_ror_ps(tt,3));					// V3'�V4^
-
-	r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2));		// V3"�V4^ - V3^�V4"
-	r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0));		// V3^�V4' - V3'�V4^
-	r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1));		// V3'�V4" - V3"�V4'
-
-	tt = _L2;
-	Va = _mm_ror_ps(tt,1);		sum = _mm_mul_ps(Va,r1);
-	Vb = _mm_ror_ps(tt,2);		sum = _mm_add_ps(sum,_mm_mul_ps(Vb,r2));
-	Vc = _mm_ror_ps(tt,3);		sum = _mm_add_ps(sum,_mm_mul_ps(Vc,r3));
-
-	// Calculating the determinant.
-	Det = _mm_mul_ps(sum,_L1);
-	Det = _mm_add_ps(Det,_mm_movehl_ps(Det,Det));
-
-	// Calculating the minterms of the second line (using previous results).
-	tt = _mm_ror_ps(_L1,1);		sum = _mm_mul_ps(tt,r1);
-	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
-	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
-
-	// Testing the determinant.
-	Det = _mm_sub_ss(Det,_mm_shuffle_ps(Det,Det,1));
-	return floatInVec(Det, 0);
-}
-
-inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( mCol0 + mat.mCol0 ),
-        ( mCol1 + mat.mCol1 ),
-        ( mCol2 + mat.mCol2 ),
-        ( mCol3 + mat.mCol3 )
-    );
-}
-
-inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( mCol0 - mat.mCol0 ),
-        ( mCol1 - mat.mCol1 ),
-        ( mCol2 - mat.mCol2 ),
-        ( mCol3 - mat.mCol3 )
-    );
-}
-
-inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
-{
-    *this = *this + mat;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
-{
-    *this = *this - mat;
-    return *this;
-}
-
-inline const Matrix4 Matrix4::operator -( ) const
-{
-    return Matrix4(
-        ( -mCol0 ),
-        ( -mCol1 ),
-        ( -mCol2 ),
-        ( -mCol3 )
-    );
-}
-
-inline const Matrix4 absPerElem( const Matrix4 & mat )
-{
-    return Matrix4(
-        absPerElem( mat.getCol0() ),
-        absPerElem( mat.getCol1() ),
-        absPerElem( mat.getCol2() ),
-        absPerElem( mat.getCol3() )
-    );
-}
-
-inline const Matrix4 Matrix4::operator *( float scalar ) const
-{
-    return *this * floatInVec(scalar);
-}
-
-inline const Matrix4 Matrix4::operator *( const floatInVec &scalar ) const
-{
-    return Matrix4(
-        ( mCol0 * scalar ),
-        ( mCol1 * scalar ),
-        ( mCol2 * scalar ),
-        ( mCol3 * scalar )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( float scalar )
-{
-    return *this *= floatInVec(scalar);
-}
-
-inline Matrix4 & Matrix4::operator *=( const floatInVec &scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Matrix4 operator *( float scalar, const Matrix4 & mat )
-{
-    return floatInVec(scalar) * mat;
-}
-
-inline const Matrix4 operator *( const floatInVec &scalar, const Matrix4 & mat )
-{
-    return mat * scalar;
-}
-
-inline const Vector4 Matrix4::operator *( const Vector4 &vec ) const
-{
-    return Vector4(
-		_mm_add_ps(
-			_mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(1,1,1,1)))),
-			_mm_add_ps(_mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(2,2,2,2))), _mm_mul_ps(mCol3.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(3,3,3,3)))))
-		);
-}
-
-inline const Vector4 Matrix4::operator *( const Vector3 &vec ) const
-{
-    return Vector4(
-		_mm_add_ps(
-			_mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(1,1,1,1)))),
-			_mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(2,2,2,2))))
-		);
-}
-
-inline const Vector4 Matrix4::operator *( const Point3 &pnt ) const
-{
-    return Vector4(
-		_mm_add_ps(
-			_mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(1,1,1,1)))),
-			_mm_add_ps(_mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(2,2,2,2))), mCol3.get128()))
-		);
-}
-
-inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( *this * mat.mCol0 ),
-        ( *this * mat.mCol1 ),
-        ( *this * mat.mCol2 ),
-        ( *this * mat.mCol3 )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
-{
-    *this = *this * mat;
-    return *this;
-}
-
-inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
-{
-    return Matrix4(
-        ( *this * tfrm.getCol0() ),
-        ( *this * tfrm.getCol1() ),
-        ( *this * tfrm.getCol2() ),
-        ( *this * Point3( tfrm.getCol3() ) )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
-{
-    *this = *this * tfrm;
-    return *this;
-}
-
-inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
-{
-    return Matrix4(
-        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
-        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
-        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
-        mulPerElem( mat0.getCol3(), mat1.getCol3() )
-    );
-}
-
-inline const Matrix4 Matrix4::identity( )
-{
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4::yAxis( ),
-        Vector4::zAxis( ),
-        Vector4::wAxis( )
-    );
-}
-
-inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
-{
-    mCol0.setXYZ( mat3.getCol0() );
-    mCol1.setXYZ( mat3.getCol1() );
-    mCol2.setXYZ( mat3.getCol2() );
-    return *this;
-}
-
-inline const Matrix3 Matrix4::getUpper3x3( ) const
-{
-    return Matrix3(
-        mCol0.getXYZ( ),
-        mCol1.getXYZ( ),
-        mCol2.getXYZ( )
-    );
-}
-
-inline Matrix4 & Matrix4::setTranslation( const Vector3 &translateVec )
-{
-    mCol3.setXYZ( translateVec );
-    return *this;
-}
-
-inline const Vector3 Matrix4::getTranslation( ) const
-{
-    return mCol3.getXYZ( );
-}
-
-inline const Matrix4 Matrix4::rotationX( float radians )
-{
-    return rotationX( floatInVec(radians) );
-}
-
-inline const Matrix4 Matrix4::rotationX( const floatInVec &radians )
-{
-    __m128 s, c, res1, res2;
-    __m128 zero;
-	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
-	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
-    zero = _mm_setzero_ps();
-    sincosf4( radians.get128(), &s, &c );
-    res1 = vec_sel( zero, c, select_y );
-    res1 = vec_sel( res1, s, select_z );
-    res2 = vec_sel( zero, negatef4(s), select_y );
-    res2 = vec_sel( res2, c, select_z );
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4( res1 ),
-        Vector4( res2 ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationY( float radians )
-{
-    return rotationY( floatInVec(radians) );
-}
-
-inline const Matrix4 Matrix4::rotationY( const floatInVec &radians )
-{
-    __m128 s, c, res0, res2;
-    __m128 zero;
-	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
-	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
-    zero = _mm_setzero_ps();
-    sincosf4( radians.get128(), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, negatef4(s), select_z );
-    res2 = vec_sel( zero, s, select_x );
-    res2 = vec_sel( res2, c, select_z );
-    return Matrix4(
-        Vector4( res0 ),
-        Vector4::yAxis( ),
-        Vector4( res2 ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationZ( float radians )
-{
-    return rotationZ( floatInVec(radians) );
-}
-
-inline const Matrix4 Matrix4::rotationZ( const floatInVec &radians )
-{
-    __m128 s, c, res0, res1;
-    __m128 zero;
-	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
-	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
-    zero = _mm_setzero_ps();
-    sincosf4( radians.get128(), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, s, select_y );
-    res1 = vec_sel( zero, negatef4(s), select_x );
-    res1 = vec_sel( res1, c, select_y );
-    return Matrix4(
-        Vector4( res0 ),
-        Vector4( res1 ),
-        Vector4::zAxis( ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationZYX( const Vector3 &radiansXYZ )
-{
-    __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    angles = Vector4( radiansXYZ, 0.0f ).get128();
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = vec_mergel( c, s );
-    Z1 = vec_mergel( negS, c );
-	__declspec(align(16)) unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
-    Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) );
-	Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) );
-	Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) );
-    X0 = vec_splat( s, 0 );
-    X1 = vec_splat( c, 0 );
-    tmp = vec_mul( Z0, Y1 );
-    return Matrix4(
-        Vector4( vec_mul( Z0, Y0 ) ),
-        Vector4( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ),
-        Vector4( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotation( float radians, const Vector3 &unitVec )
-{
-    return rotation( floatInVec(radians), unitVec );
-}
-
-inline const Matrix4 Matrix4::rotation( const floatInVec &radians, const Vector3 &unitVec )
-{
-    __m128 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
-    axis = unitVec.get128();
-    sincosf4( radians.get128(), &s, &c );
-    xxxx = vec_splat( axis, 0 );
-    yyyy = vec_splat( axis, 1 );
-    zzzz = vec_splat( axis, 2 );
-    oneMinusC = vec_sub( _mm_set1_ps(1.0f), c );
-    axisS = vec_mul( axis, s );
-    negAxisS = negatef4( axisS );
-	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
-	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
-	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
-    //tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
-	tmp0 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,2,0) );
-	tmp0 = vec_sel(tmp0, vec_splat(negAxisS, 1), select_z);
-    //tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
-	tmp1 = vec_sel( vec_splat(axisS, 0), vec_splat(negAxisS, 2), select_x );
-    //tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
-	tmp2 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,0,1) );
-	tmp2 = vec_sel(tmp2, vec_splat(negAxisS, 0), select_y);
-    tmp0 = vec_sel( tmp0, c, select_x );
-    tmp1 = vec_sel( tmp1, c, select_y );
-    tmp2 = vec_sel( tmp2, c, select_z );
-	__declspec(align(16)) unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
-    axis = vec_and( axis, _mm_load_ps( (float *)select_xyz ) );
-    tmp0 = vec_and( tmp0, _mm_load_ps( (float *)select_xyz ) );
-    tmp1 = vec_and( tmp1, _mm_load_ps( (float *)select_xyz ) );
-    tmp2 = vec_and( tmp2, _mm_load_ps( (float *)select_xyz ) );
-    return Matrix4(
-        Vector4( vec_madd( vec_mul( axis, xxxx ), oneMinusC, tmp0 ) ),
-        Vector4( vec_madd( vec_mul( axis, yyyy ), oneMinusC, tmp1 ) ),
-        Vector4( vec_madd( vec_mul( axis, zzzz ), oneMinusC, tmp2 ) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotation( const Quat &unitQuat )
-{
-    return Matrix4( Transform3::rotation( unitQuat ) );
-}
-
-inline const Matrix4 Matrix4::scale( const Vector3 &scaleVec )
-{
-    __m128 zero = _mm_setzero_ps();
-	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
-	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
-	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
-    return Matrix4(
-        Vector4( vec_sel( zero, scaleVec.get128(), select_x ) ),
-        Vector4( vec_sel( zero, scaleVec.get128(), select_y ) ),
-        Vector4( vec_sel( zero, scaleVec.get128(), select_z ) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 &scaleVec )
-{
-    return Matrix4(
-        ( mat.getCol0() * scaleVec.getX( ) ),
-        ( mat.getCol1() * scaleVec.getY( ) ),
-        ( mat.getCol2() * scaleVec.getZ( ) ),
-        mat.getCol3()
-    );
-}
-
-inline const Matrix4 prependScale( const Vector3 &scaleVec, const Matrix4 & mat )
-{
-    Vector4 scale4;
-    scale4 = Vector4( scaleVec, 1.0f );
-    return Matrix4(
-        mulPerElem( mat.getCol0(), scale4 ),
-        mulPerElem( mat.getCol1(), scale4 ),
-        mulPerElem( mat.getCol2(), scale4 ),
-        mulPerElem( mat.getCol3(), scale4 )
-    );
-}
-
-inline const Matrix4 Matrix4::translation( const Vector3 &translateVec )
-{
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4::yAxis( ),
-        Vector4::zAxis( ),
-        Vector4( translateVec, 1.0f )
-    );
-}
-
-inline const Matrix4 Matrix4::lookAt( const Point3 &eyePos, const Point3 &lookAtPos, const Vector3 &upVec )
-{
-    Matrix4 m4EyeFrame;
-    Vector3 v3X, v3Y, v3Z;
-    v3Y = normalize( upVec );
-    v3Z = normalize( ( eyePos - lookAtPos ) );
-    v3X = normalize( cross( v3Y, v3Z ) );
-    v3Y = cross( v3Z, v3X );
-    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
-    return orthoInverse( m4EyeFrame );
-}
-
-inline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar )
-{
-    float f, rangeInv;
-    __m128 zero, col0, col1, col2, col3;
-    union { __m128 v; float s[4]; } tmp;
-    f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f );
-    rangeInv = 1.0f / ( zNear - zFar );
-    zero = _mm_setzero_ps();
-    tmp.v = zero;
-    tmp.s[0] = f / aspect;
-    col0 = tmp.v;
-    tmp.v = zero;
-    tmp.s[1] = f;
-    col1 = tmp.v;
-    tmp.v = zero;
-    tmp.s[2] = ( zNear + zFar ) * rangeInv;
-    tmp.s[3] = -1.0f;
-    col2 = tmp.v;
-    tmp.v = zero;
-    tmp.s[2] = zNear * zFar * rangeInv * 2.0f;
-    col3 = tmp.v;
-    return Matrix4(
-        Vector4( col0 ),
-        Vector4( col1 ),
-        Vector4( col2 ),
-        Vector4( col3 )
-    );
-}
-
-inline const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    __m128 lbf, rtn;
-    __m128 diff, sum, inv_diff;
-    __m128 diagonal, column, near2;
-    __m128 zero = _mm_setzero_ps();
-    union { __m128 v; float s[4]; } l, f, r, n, b, t; // TODO: Union?
-    l.s[0] = left;
-    f.s[0] = zFar;
-    r.s[0] = right;
-    n.s[0] = zNear;
-    b.s[0] = bottom;
-    t.s[0] = top;
-    lbf = vec_mergeh( l.v, f.v );
-    rtn = vec_mergeh( r.v, n.v );
-    lbf = vec_mergeh( lbf, b.v );
-    rtn = vec_mergeh( rtn, t.v );
-    diff = vec_sub( rtn, lbf );
-    sum  = vec_add( rtn, lbf );
-    inv_diff = recipf4( diff );
-    near2 = vec_splat( n.v, 0 );
-    near2 = vec_add( near2, near2 );
-    diagonal = vec_mul( near2, inv_diff );
-    column = vec_mul( sum, inv_diff );
-	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
-	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
-	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
-	__declspec(align(16)) unsigned int select_w[4] = {0, 0, 0, 0xffffffff};
-    return Matrix4(
-        Vector4( vec_sel( zero, diagonal, select_x ) ),
-        Vector4( vec_sel( zero, diagonal, select_y ) ),
-        Vector4( vec_sel( column, _mm_set1_ps(-1.0f), select_w ) ),
-        Vector4( vec_sel( zero, vec_mul( diagonal, vec_splat( f.v, 0 ) ), select_z ) )
-	);
-}
-
-inline const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    __m128 lbf, rtn;
-    __m128 diff, sum, inv_diff, neg_inv_diff;
-    __m128 diagonal, column;
-    __m128 zero = _mm_setzero_ps();
-    union { __m128 v; float s[4]; } l, f, r, n, b, t;
-    l.s[0] = left;
-    f.s[0] = zFar;
-    r.s[0] = right;
-    n.s[0] = zNear;
-    b.s[0] = bottom;
-    t.s[0] = top;
-    lbf = vec_mergeh( l.v, f.v );
-    rtn = vec_mergeh( r.v, n.v );
-    lbf = vec_mergeh( lbf, b.v );
-    rtn = vec_mergeh( rtn, t.v );
-    diff = vec_sub( rtn, lbf );
-    sum  = vec_add( rtn, lbf );
-    inv_diff = recipf4( diff );
-    neg_inv_diff = negatef4( inv_diff );
-    diagonal = vec_add( inv_diff, inv_diff );
-	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
-	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
-	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
-	__declspec(align(16)) unsigned int select_w[4] = {0, 0, 0, 0xffffffff};
-    column = vec_mul( sum, vec_sel( neg_inv_diff, inv_diff, select_z ) ); // TODO: no madds with zero
-    return Matrix4(
-        Vector4( vec_sel( zero, diagonal, select_x ) ),
-        Vector4( vec_sel( zero, diagonal, select_y ) ),
-        Vector4( vec_sel( zero, diagonal, select_z ) ),
-        Vector4( vec_sel( column, _mm_set1_ps(1.0f), select_w ) )
-    );
-}
-
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 )
-{
-    return Matrix4(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 ),
-        select( mat0.getCol3(), mat1.getCol3(), select1 )
-    );
-}
-
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, const boolInVec &select1 )
-{
-    return Matrix4(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 ),
-        select( mat0.getCol3(), mat1.getCol3(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Matrix4 & mat )
-{
-    print( mat.getRow( 0 ) );
-    print( mat.getRow( 1 ) );
-    print( mat.getRow( 2 ) );
-    print( mat.getRow( 3 ) );
-}
-
-inline void print( const Matrix4 & mat, const char * name )
-{
-    printf("%s:\n", name);
-    print( mat );
-}
-
-#endif
-
-inline Transform3::Transform3( const Transform3 & tfrm )
-{
-    mCol0 = tfrm.mCol0;
-    mCol1 = tfrm.mCol1;
-    mCol2 = tfrm.mCol2;
-    mCol3 = tfrm.mCol3;
-}
-
-inline Transform3::Transform3( float scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-    mCol3 = Vector3( scalar );
-}
-
-inline Transform3::Transform3( const floatInVec &scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-    mCol3 = Vector3( scalar );
-}
-
-inline Transform3::Transform3( const Vector3 &_col0, const Vector3 &_col1, const Vector3 &_col2, const Vector3 &_col3 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-    mCol3 = _col3;
-}
-
-inline Transform3::Transform3( const Matrix3 & tfrm, const Vector3 &translateVec )
-{
-    this->setUpper3x3( tfrm );
-    this->setTranslation( translateVec );
-}
-
-inline Transform3::Transform3( const Quat &unitQuat, const Vector3 &translateVec )
-{
-    this->setUpper3x3( Matrix3( unitQuat ) );
-    this->setTranslation( translateVec );
-}
-
-inline Transform3 & Transform3::setCol0( const Vector3 &_col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol1( const Vector3 &_col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol2( const Vector3 &_col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol3( const Vector3 &_col3 )
-{
-    mCol3 = _col3;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol( int col, const Vector3 &vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Transform3 & Transform3::setRow( int row, const Vector4 &vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    mCol3.setElem( row, vec.getElem( 3 ) );
-    return *this;
-}
-
-inline Transform3 & Transform3::setElem( int col, int row, float val )
-{
-    (*this)[col].setElem(row, val);
-    return *this;
-}
-
-inline Transform3 & Transform3::setElem( int col, int row, const floatInVec &val )
-{
-    Vector3 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline const floatInVec Transform3::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector3 Transform3::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector3 Transform3::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector3 Transform3::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector3 Transform3::getCol3( ) const
-{
-    return mCol3;
-}
-
-inline const Vector3 Transform3::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Transform3::getRow( int row ) const
-{
-    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
-}
-
-inline Vector3 & Transform3::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Transform3::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
-{
-    mCol0 = tfrm.mCol0;
-    mCol1 = tfrm.mCol1;
-    mCol2 = tfrm.mCol2;
-    mCol3 = tfrm.mCol3;
-    return *this;
-}
-
-inline const Transform3 inverse( const Transform3 & tfrm )
-{
-    __m128 inv0, inv1, inv2, inv3;
-    __m128 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet;
-    __m128 xxxx, yyyy, zzzz;
-    tmp2 = _vmathVfCross( tfrm.getCol0().get128(), tfrm.getCol1().get128() );
-    tmp0 = _vmathVfCross( tfrm.getCol1().get128(), tfrm.getCol2().get128() );
-    tmp1 = _vmathVfCross( tfrm.getCol2().get128(), tfrm.getCol0().get128() );
-    inv3 = negatef4( tfrm.getCol3().get128() );
-    dot = _vmathVfDot3( tmp2, tfrm.getCol2().get128() );
-    dot = vec_splat( dot, 0 );
-    invdet = recipf4( dot );
-    tmp3 = vec_mergeh( tmp0, tmp2 );
-    tmp4 = vec_mergel( tmp0, tmp2 );
-    inv0 = vec_mergeh( tmp3, tmp1 );
-    xxxx = vec_splat( inv3, 0 );
-    //inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
-	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
-	inv1 = _mm_shuffle_ps( tmp3, tmp3, _MM_SHUFFLE(0,3,2,2));
-	inv1 = vec_sel(inv1, tmp1, select_y);
-    //inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
-	inv2 = _mm_shuffle_ps( tmp4, tmp4, _MM_SHUFFLE(0,1,1,0));
-	inv2 = vec_sel(inv2, vec_splat(tmp1, 2), select_y);
-    yyyy = vec_splat( inv3, 1 );
-    zzzz = vec_splat( inv3, 2 );
-    inv3 = vec_mul( inv0, xxxx );
-    inv3 = vec_madd( inv1, yyyy, inv3 );
-    inv3 = vec_madd( inv2, zzzz, inv3 );
-    inv0 = vec_mul( inv0, invdet );
-    inv1 = vec_mul( inv1, invdet );
-    inv2 = vec_mul( inv2, invdet );
-    inv3 = vec_mul( inv3, invdet );
-    return Transform3(
-        Vector3( inv0 ),
-        Vector3( inv1 ),
-        Vector3( inv2 ),
-        Vector3( inv3 )
-    );
-}
-
-inline const Transform3 orthoInverse( const Transform3 & tfrm )
-{
-    __m128 inv0, inv1, inv2, inv3;
-    __m128 tmp0, tmp1;
-    __m128 xxxx, yyyy, zzzz;
-    tmp0 = vec_mergeh( tfrm.getCol0().get128(), tfrm.getCol2().get128() );
-    tmp1 = vec_mergel( tfrm.getCol0().get128(), tfrm.getCol2().get128() );
-    inv3 = negatef4( tfrm.getCol3().get128() );
-    inv0 = vec_mergeh( tmp0, tfrm.getCol1().get128() );
-    xxxx = vec_splat( inv3, 0 );
-    //inv1 = vec_perm( tmp0, tfrm.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
-	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
-	inv1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2));
-	inv1 = vec_sel(inv1, tfrm.getCol1().get128(), select_y);
-    //inv2 = vec_perm( tmp1, tfrm.getCol1().get128(), _VECTORMATH_PERM_XCYX );
-	inv2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0));
-	inv2 = vec_sel(inv2, vec_splat(tfrm.getCol1().get128(), 2), select_y);
-    yyyy = vec_splat( inv3, 1 );
-    zzzz = vec_splat( inv3, 2 );
-    inv3 = vec_mul( inv0, xxxx );
-    inv3 = vec_madd( inv1, yyyy, inv3 );
-    inv3 = vec_madd( inv2, zzzz, inv3 );
-    return Transform3(
-        Vector3( inv0 ),
-        Vector3( inv1 ),
-        Vector3( inv2 ),
-        Vector3( inv3 )
-    );
-}
-
-inline const Transform3 absPerElem( const Transform3 & tfrm )
-{
-    return Transform3(
-        absPerElem( tfrm.getCol0() ),
-        absPerElem( tfrm.getCol1() ),
-        absPerElem( tfrm.getCol2() ),
-        absPerElem( tfrm.getCol3() )
-    );
-}
-
-inline const Vector3 Transform3::operator *( const Vector3 &vec ) const
-{
-    __m128 res;
-    __m128 xxxx, yyyy, zzzz;
-    xxxx = vec_splat( vec.get128(), 0 );
-    yyyy = vec_splat( vec.get128(), 1 );
-    zzzz = vec_splat( vec.get128(), 2 );
-    res = vec_mul( mCol0.get128(), xxxx );
-    res = vec_madd( mCol1.get128(), yyyy, res );
-    res = vec_madd( mCol2.get128(), zzzz, res );
-    return Vector3( res );
-}
-
-inline const Point3 Transform3::operator *( const Point3 &pnt ) const
-{
-    __m128 tmp0, tmp1, res;
-    __m128 xxxx, yyyy, zzzz;
-    xxxx = vec_splat( pnt.get128(), 0 );
-    yyyy = vec_splat( pnt.get128(), 1 );
-    zzzz = vec_splat( pnt.get128(), 2 );
-    tmp0 = vec_mul( mCol0.get128(), xxxx );
-    tmp1 = vec_mul( mCol1.get128(), yyyy );
-    tmp0 = vec_madd( mCol2.get128(), zzzz, tmp0 );
-    tmp1 = vec_add( mCol3.get128(), tmp1 );
-    res = vec_add( tmp0, tmp1 );
-    return Point3( res );
-}
-
-inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
-{
-    return Transform3(
-        ( *this * tfrm.mCol0 ),
-        ( *this * tfrm.mCol1 ),
-        ( *this * tfrm.mCol2 ),
-        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
-    );
-}
-
-inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
-{
-    *this = *this * tfrm;
-    return *this;
-}
-
-inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
-{
-    return Transform3(
-        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
-        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
-        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
-        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
-    );
-}
-
-inline const Transform3 Transform3::identity( )
-{
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( ),
-        Vector3( 0.0f )
-    );
-}
-
-inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
-{
-    mCol0 = tfrm.getCol0();
-    mCol1 = tfrm.getCol1();
-    mCol2 = tfrm.getCol2();
-    return *this;
-}
-
-inline const Matrix3 Transform3::getUpper3x3( ) const
-{
-    return Matrix3( mCol0, mCol1, mCol2 );
-}
-
-inline Transform3 & Transform3::setTranslation( const Vector3 &translateVec )
-{
-    mCol3 = translateVec;
-    return *this;
-}
-
-inline const Vector3 Transform3::getTranslation( ) const
-{
-    return mCol3;
-}
-
-inline const Transform3 Transform3::rotationX( float radians )
-{
-    return rotationX( floatInVec(radians) );
-}
-
-inline const Transform3 Transform3::rotationX( const floatInVec &radians )
-{
-    __m128 s, c, res1, res2;
-    __m128 zero;
-	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
-	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
-    zero = _mm_setzero_ps();
-    sincosf4( radians.get128(), &s, &c );
-    res1 = vec_sel( zero, c, select_y );
-    res1 = vec_sel( res1, s, select_z );
-    res2 = vec_sel( zero, negatef4(s), select_y );
-    res2 = vec_sel( res2, c, select_z );
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3( res1 ),
-        Vector3( res2 ),
-        Vector3( _mm_setzero_ps() )
-    );
-}
-
-inline const Transform3 Transform3::rotationY( float radians )
-{
-    return rotationY( floatInVec(radians) );
-}
-
-inline const Transform3 Transform3::rotationY( const floatInVec &radians )
-{
-    __m128 s, c, res0, res2;
-    __m128 zero;
-	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
-	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
-    zero = _mm_setzero_ps();
-    sincosf4( radians.get128(), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, negatef4(s), select_z );
-    res2 = vec_sel( zero, s, select_x );
-    res2 = vec_sel( res2, c, select_z );
-    return Transform3(
-        Vector3( res0 ),
-        Vector3::yAxis( ),
-        Vector3( res2 ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotationZ( float radians )
-{
-    return rotationZ( floatInVec(radians) );
-}
-
-inline const Transform3 Transform3::rotationZ( const floatInVec &radians )
-{
-    __m128 s, c, res0, res1;
-	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
-	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
-    __m128 zero = _mm_setzero_ps();
-    sincosf4( radians.get128(), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, s, select_y );
-    res1 = vec_sel( zero, negatef4(s), select_x );
-    res1 = vec_sel( res1, c, select_y );
-    return Transform3(
-        Vector3( res0 ),
-        Vector3( res1 ),
-        Vector3::zAxis( ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotationZYX( const Vector3 &radiansXYZ )
-{
-    __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    angles = Vector4( radiansXYZ, 0.0f ).get128();
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = vec_mergel( c, s );
-    Z1 = vec_mergel( negS, c );
-	__declspec(align(16)) unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
-    Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) );
-	Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) );
-	Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) );
-    X0 = vec_splat( s, 0 );
-    X1 = vec_splat( c, 0 );
-    tmp = vec_mul( Z0, Y1 );
-    return Transform3(
-        Vector3( vec_mul( Z0, Y0 ) ),
-        Vector3( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ),
-        Vector3( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotation( float radians, const Vector3 &unitVec )
-{
-    return rotation( floatInVec(radians), unitVec );
-}
-
-inline const Transform3 Transform3::rotation( const floatInVec &radians, const Vector3 &unitVec )
-{
-    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) );
-}
-
-inline const Transform3 Transform3::rotation( const Quat &unitQuat )
-{
-    return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) );
-}
-
-inline const Transform3 Transform3::scale( const Vector3 &scaleVec )
-{
-    __m128 zero = _mm_setzero_ps();
-	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
-	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
-	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
-    return Transform3(
-        Vector3( vec_sel( zero, scaleVec.get128(), select_x ) ),
-        Vector3( vec_sel( zero, scaleVec.get128(), select_y ) ),
-        Vector3( vec_sel( zero, scaleVec.get128(), select_z ) ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 &scaleVec )
-{
-    return Transform3(
-        ( tfrm.getCol0() * scaleVec.getX( ) ),
-        ( tfrm.getCol1() * scaleVec.getY( ) ),
-        ( tfrm.getCol2() * scaleVec.getZ( ) ),
-        tfrm.getCol3()
-    );
-}
-
-inline const Transform3 prependScale( const Vector3 &scaleVec, const Transform3 & tfrm )
-{
-    return Transform3(
-        mulPerElem( tfrm.getCol0(), scaleVec ),
-        mulPerElem( tfrm.getCol1(), scaleVec ),
-        mulPerElem( tfrm.getCol2(), scaleVec ),
-        mulPerElem( tfrm.getCol3(), scaleVec )
-    );
-}
-
-inline const Transform3 Transform3::translation( const Vector3 &translateVec )
-{
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( ),
-        translateVec
-    );
-}
-
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 )
-{
-    return Transform3(
-        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
-        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
-        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
-        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
-    );
-}
-
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, const boolInVec &select1 )
-{
-    return Transform3(
-        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
-        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
-        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
-        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Transform3 & tfrm )
-{
-    print( tfrm.getRow( 0 ) );
-    print( tfrm.getRow( 1 ) );
-    print( tfrm.getRow( 2 ) );
-}
-
-inline void print( const Transform3 & tfrm, const char * name )
-{
-    printf("%s:\n", name);
-    print( tfrm );
-}
-
-#endif
-
-inline Quat::Quat( const Matrix3 & tfrm )
-{
-    __m128 res;
-    __m128 col0, col1, col2;
-    __m128 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff;
-    __m128 zy_xz_yx, yz_zx_xy, sum, diff;
-    __m128 radicand, invSqrt, scale;
-    __m128 res0, res1, res2, res3;
-    __m128 xx, yy, zz;
-	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
-	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
-	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
-	__declspec(align(16)) unsigned int select_w[4] = {0, 0, 0, 0xffffffff};
-
-    col0 = tfrm.getCol0().get128();
-    col1 = tfrm.getCol1().get128();
-    col2 = tfrm.getCol2().get128();
-
-    /* four cases: */
-    /* trace > 0 */
-    /* else */
-    /*    xx largest diagonal element */
-    /*    yy largest diagonal element */
-    /*    zz largest diagonal element */
-
-    /* compute quaternion for each case */
-
-    xx_yy = vec_sel( col0, col1, select_y );
-    //xx_yy_zz_xx = vec_perm( xx_yy, col2, _VECTORMATH_PERM_XYCX );
-    //yy_zz_xx_yy = vec_perm( xx_yy, col2, _VECTORMATH_PERM_YCXY );
-    //zz_xx_yy_zz = vec_perm( xx_yy, col2, _VECTORMATH_PERM_CXYC );
-    xx_yy_zz_xx = _mm_shuffle_ps( xx_yy, xx_yy, _MM_SHUFFLE(0,0,1,0) );
-    xx_yy_zz_xx = vec_sel( xx_yy_zz_xx, col2, select_z ); // TODO: Ck
-    yy_zz_xx_yy = _mm_shuffle_ps( xx_yy_zz_xx, xx_yy_zz_xx, _MM_SHUFFLE(1,0,2,1) );
-    zz_xx_yy_zz = _mm_shuffle_ps( xx_yy_zz_xx, xx_yy_zz_xx, _MM_SHUFFLE(2,1,0,2) );
-
-    diagSum = vec_add( vec_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
-    diagDiff = vec_sub( vec_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
-    radicand = vec_add( vec_sel( diagDiff, diagSum, select_w ), _mm_set1_ps(1.0f) );
-    invSqrt = rsqrtf4( radicand );
-
-    zy_xz_yx = vec_sel( col0, col1, select_z );									// zy_xz_yx = 00 01 12 03
-    //zy_xz_yx = vec_perm( zy_xz_yx, col2, _VECTORMATH_PERM_ZAYX );
-	zy_xz_yx = _mm_shuffle_ps( zy_xz_yx, zy_xz_yx, _MM_SHUFFLE(0,1,2,2) );		// zy_xz_yx = 12 12 01 00
-    zy_xz_yx = vec_sel( zy_xz_yx, vec_splat(col2, 0), select_y );				// zy_xz_yx = 12 20 01 00
-    yz_zx_xy = vec_sel( col0, col1, select_x );									// yz_zx_xy = 10 01 02 03
-    //yz_zx_xy = vec_perm( yz_zx_xy, col2, _VECTORMATH_PERM_BZXX );
-	yz_zx_xy = _mm_shuffle_ps( yz_zx_xy, yz_zx_xy, _MM_SHUFFLE(0,0,2,0) );		// yz_zx_xy = 10 02 10 10
-	yz_zx_xy = vec_sel( yz_zx_xy, vec_splat(col2, 1), select_x );				// yz_zx_xy = 21 02 10 10
-
-    sum = vec_add( zy_xz_yx, yz_zx_xy );
-    diff = vec_sub( zy_xz_yx, yz_zx_xy );
-
-    scale = vec_mul( invSqrt, _mm_set1_ps(0.5f) );
-
-    //res0 = vec_perm( sum, diff, _VECTORMATH_PERM_XZYA );
-	res0 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,1,2,0) );
-	res0 = vec_sel( res0, vec_splat(diff, 0), select_w );  // TODO: Ck
-    //res1 = vec_perm( sum, diff, _VECTORMATH_PERM_ZXXB );
-	res1 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,0,0,2) );
-	res1 = vec_sel( res1, vec_splat(diff, 1), select_w );  // TODO: Ck
-    //res2 = vec_perm( sum, diff, _VECTORMATH_PERM_YXXC );
-	res2 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,0,0,1) );
-	res2 = vec_sel( res2, vec_splat(diff, 2), select_w );  // TODO: Ck
-    res3 = diff;
-    res0 = vec_sel( res0, radicand, select_x );
-    res1 = vec_sel( res1, radicand, select_y );
-    res2 = vec_sel( res2, radicand, select_z );
-    res3 = vec_sel( res3, radicand, select_w );
-    res0 = vec_mul( res0, vec_splat( scale, 0 ) );
-    res1 = vec_mul( res1, vec_splat( scale, 1 ) );
-    res2 = vec_mul( res2, vec_splat( scale, 2 ) );
-    res3 = vec_mul( res3, vec_splat( scale, 3 ) );
-
-    /* determine case and select answer */
-
-    xx = vec_splat( col0, 0 );
-    yy = vec_splat( col1, 1 );
-    zz = vec_splat( col2, 2 );
-    res = vec_sel( res0, res1, vec_cmpgt( yy, xx ) );
-    res = vec_sel( res, res2, vec_and( vec_cmpgt( zz, xx ), vec_cmpgt( zz, yy ) ) );
-    res = vec_sel( res, res3, vec_cmpgt( vec_splat( diagSum, 0 ), _mm_setzero_ps() ) );
-    mVec128 = res;
-}
-
-inline const Matrix3 outer( const Vector3 &tfrm0, const Vector3 &tfrm1 )
-{
-    return Matrix3(
-        ( tfrm0 * tfrm1.getX( ) ),
-        ( tfrm0 * tfrm1.getY( ) ),
-        ( tfrm0 * tfrm1.getZ( ) )
-    );
-}
-
-inline const Matrix4 outer( const Vector4 &tfrm0, const Vector4 &tfrm1 )
-{
-    return Matrix4(
-        ( tfrm0 * tfrm1.getX( ) ),
-        ( tfrm0 * tfrm1.getY( ) ),
-        ( tfrm0 * tfrm1.getZ( ) ),
-        ( tfrm0 * tfrm1.getW( ) )
-    );
-}
-
-inline const Vector3 rowMul( const Vector3 &vec, const Matrix3 & mat )
-{
-    __m128 tmp0, tmp1, mcol0, mcol1, mcol2, res;
-    __m128 xxxx, yyyy, zzzz;
-    tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
-    tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
-    xxxx = vec_splat( vec.get128(), 0 );
-    mcol0 = vec_mergeh( tmp0, mat.getCol1().get128() );
-    //mcol1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
-	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
-	mcol1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2));
-	mcol1 = vec_sel(mcol1, mat.getCol1().get128(), select_y);
-    //mcol2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX );
-	mcol2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0));
-	mcol2 = vec_sel(mcol2, vec_splat(mat.getCol1().get128(), 2), select_y);
-    yyyy = vec_splat( vec.get128(), 1 );
-    res = vec_mul( mcol0, xxxx );
-    zzzz = vec_splat( vec.get128(), 2 );
-    res = vec_madd( mcol1, yyyy, res );
-    res = vec_madd( mcol2, zzzz, res );
-    return Vector3( res );
-}
-
-inline const Matrix3 crossMatrix( const Vector3 &vec )
-{
-    __m128 neg, res0, res1, res2;
-    neg = negatef4( vec.get128() );
-	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
-	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
-	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
-    //res0 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_XZBX );
-	res0 = _mm_shuffle_ps( vec.get128(), vec.get128(), _MM_SHUFFLE(0,2,2,0) );
-	res0 = vec_sel(res0, vec_splat(neg, 1), select_z);
-    //res1 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_CXXX );
-	res1 = vec_sel(vec_splat(vec.get128(), 0), vec_splat(neg, 2), select_x);
-    //res2 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_YAXX );
-	res2 = _mm_shuffle_ps( vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,1,1) );
-	res2 = vec_sel(res2, vec_splat(neg, 0), select_y);
-	__declspec(align(16)) unsigned int filter_x[4] = {0, 0xffffffff, 0xffffffff, 0xffffffff};
-	__declspec(align(16)) unsigned int filter_y[4] = {0xffffffff, 0, 0xffffffff, 0xffffffff};
-	__declspec(align(16)) unsigned int filter_z[4] = {0xffffffff, 0xffffffff, 0, 0xffffffff};
-    res0 = vec_and( res0, _mm_load_ps((float *)filter_x ) );
-    res1 = vec_and( res1, _mm_load_ps((float *)filter_y ) );
-    res2 = vec_and( res2, _mm_load_ps((float *)filter_z ) ); // TODO: Use selects?
-    return Matrix3(
-        Vector3( res0 ),
-        Vector3( res1 ),
-        Vector3( res2 )
-    );
-}
-
-inline const Matrix3 crossMatrixMul( const Vector3 &vec, const Matrix3 & mat )
-{
-    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
-}
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#ifndef _VECTORMATH_MAT_AOS_CPP_H
+#define _VECTORMATH_MAT_AOS_CPP_H
+
+namespace Vectormath {
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// Constants
+// for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+
+#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B })
+#define _VECTORMATH_PERM_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_XZBX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X })     
+#define _VECTORMATH_PERM_CXXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_YAXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W, _VECTORMATH_PERM_Z })
+#define _VECTORMATH_PERM_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y })
+#define _VECTORMATH_PERM_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_ZAYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_BZXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
+#define _VECTORMATH_PERM_ZXXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_B })
+#define _VECTORMATH_PERM_YXXC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_BBYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+inline Matrix3::Matrix3( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+}
+
+inline Matrix3::Matrix3( float scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+}
+
+inline Matrix3::Matrix3( const floatInVec &scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+}
+
+inline Matrix3::Matrix3( const Quat &unitQuat )
+{
+    __m128 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2;
+    __m128 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+	__declspec(align(16)) unsigned int sx[4] = {0xffffffff, 0, 0, 0};
+	__declspec(align(16)) unsigned int sz[4] = {0, 0, 0xffffffff, 0};
+	__m128 select_x = _mm_load_ps((float *)sx);
+	__m128 select_z = _mm_load_ps((float *)sz);
+
+    xyzw_2 = _mm_add_ps( unitQuat.get128(), unitQuat.get128() );
+    wwww = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,3,3,3) );
+	yzxw = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,0,2,1) );
+	zxyw = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,1,0,2) );
+    yzxw_2 = _mm_shuffle_ps( xyzw_2, xyzw_2, _MM_SHUFFLE(3,0,2,1) );
+    zxyw_2 = _mm_shuffle_ps( xyzw_2, xyzw_2, _MM_SHUFFLE(3,1,0,2) );
+
+    tmp0 = _mm_mul_ps( yzxw_2, wwww );									// tmp0 = 2yw, 2zw, 2xw, 2w2
+	tmp1 = _mm_sub_ps( _mm_set1_ps(1.0f), _mm_mul_ps(yzxw, yzxw_2) );	// tmp1 = 1 - 2y2, 1 - 2z2, 1 - 2x2, 1 - 2w2
+    tmp2 = _mm_mul_ps( yzxw, xyzw_2 );									// tmp2 = 2xy, 2yz, 2xz, 2w2
+    tmp0 = _mm_add_ps( _mm_mul_ps(zxyw, xyzw_2), tmp0 );				// tmp0 = 2yw + 2zx, 2zw + 2xy, 2xw + 2yz, 2w2 + 2w2
+    tmp1 = _mm_sub_ps( tmp1, _mm_mul_ps(zxyw, zxyw_2) );				// tmp1 = 1 - 2y2 - 2z2, 1 - 2z2 - 2x2, 1 - 2x2 - 2y2, 1 - 2w2 - 2w2
+    tmp2 = _mm_sub_ps( tmp2, _mm_mul_ps(zxyw_2, wwww) );				// tmp2 = 2xy - 2zw, 2yz - 2xw, 2xz - 2yw, 2w2 -2w2
+
+    tmp3 = vec_sel( tmp0, tmp1, select_x );
+    tmp4 = vec_sel( tmp1, tmp2, select_x );
+    tmp5 = vec_sel( tmp2, tmp0, select_x );
+    mCol0 = Vector3( vec_sel( tmp3, tmp2, select_z ) );
+    mCol1 = Vector3( vec_sel( tmp4, tmp0, select_z ) );
+    mCol2 = Vector3( vec_sel( tmp5, tmp1, select_z ) );
+}
+
+inline Matrix3::Matrix3( const Vector3 &_col0, const Vector3 &_col1, const Vector3 &_col2 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+}
+
+inline Matrix3 & Matrix3::setCol0( const Vector3 &_col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol1( const Vector3 &_col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol2( const Vector3 &_col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol( int col, const Vector3 &vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setRow( int row, const Vector3 &vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setElem( int col, int row, float val )
+{
+    (*this)[col].setElem(row, val);
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setElem( int col, int row, const floatInVec &val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline const floatInVec Matrix3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Matrix3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Matrix3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Matrix3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Matrix3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::getRow( int row ) const
+{
+    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
+}
+
+inline Vector3 & Matrix3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    return *this;
+}
+
+inline const Matrix3 transpose( const Matrix3 & mat )
+{
+    __m128 tmp0, tmp1, res0, res1, res2;
+    tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
+    tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
+    res0 = vec_mergeh( tmp0, mat.getCol1().get128() );
+    //res1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
+	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	res1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2));
+	res1 = vec_sel(res1, mat.getCol1().get128(), select_y);
+    //res2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX );
+	res2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0));
+	res2 = vec_sel(res2, vec_splat(mat.getCol1().get128(), 2), select_y);
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3( res2 )
+    );
+}
+
+inline const Matrix3 inverse( const Matrix3 & mat )
+{
+    __m128 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2;
+    tmp2 = _vmathVfCross( mat.getCol0().get128(), mat.getCol1().get128() );
+    tmp0 = _vmathVfCross( mat.getCol1().get128(), mat.getCol2().get128() );
+    tmp1 = _vmathVfCross( mat.getCol2().get128(), mat.getCol0().get128() );
+    dot = _vmathVfDot3( tmp2, mat.getCol2().get128() );
+    dot = vec_splat( dot, 0 );
+    invdet = recipf4( dot );
+    tmp3 = vec_mergeh( tmp0, tmp2 );
+    tmp4 = vec_mergel( tmp0, tmp2 );
+    inv0 = vec_mergeh( tmp3, tmp1 );
+    //inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
+	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	inv1 = _mm_shuffle_ps( tmp3, tmp3, _MM_SHUFFLE(0,3,2,2));
+	inv1 = vec_sel(inv1, tmp1, select_y);
+    //inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
+	inv2 = _mm_shuffle_ps( tmp4, tmp4, _MM_SHUFFLE(0,1,1,0));
+	inv2 = vec_sel(inv2, vec_splat(tmp1, 2), select_y);
+    inv0 = vec_mul( inv0, invdet );
+    inv1 = vec_mul( inv1, invdet );
+	inv2 = vec_mul( inv2, invdet );
+    return Matrix3(
+        Vector3( inv0 ),
+        Vector3( inv1 ),
+        Vector3( inv2 )
+    );
+}
+
+inline const floatInVec determinant( const Matrix3 & mat )
+{
+    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
+}
+
+inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 )
+    );
+}
+
+inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix3 Matrix3::operator -( ) const
+{
+    return Matrix3(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 )
+    );
+}
+
+inline const Matrix3 absPerElem( const Matrix3 & mat )
+{
+    return Matrix3(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::operator *( float scalar ) const
+{
+    return *this * floatInVec(scalar);
+}
+
+inline const Matrix3 Matrix3::operator *( const floatInVec &scalar ) const
+{
+    return Matrix3(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( float scalar )
+{
+    return *this *= floatInVec(scalar);
+}
+
+inline Matrix3 & Matrix3::operator *=( const floatInVec &scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix3 operator *( float scalar, const Matrix3 & mat )
+{
+    return floatInVec(scalar) * mat;
+}
+
+inline const Matrix3 operator *( const floatInVec &scalar, const Matrix3 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector3 Matrix3::operator *( const Vector3 &vec ) const
+{
+    __m128 res;
+    __m128 xxxx, yyyy, zzzz;
+    xxxx = vec_splat( vec.get128(), 0 );
+    yyyy = vec_splat( vec.get128(), 1 );
+    zzzz = vec_splat( vec.get128(), 2 );
+    res = vec_mul( mCol0.get128(), xxxx );
+    res = vec_madd( mCol1.get128(), yyyy, res );
+    res = vec_madd( mCol2.get128(), zzzz, res );
+    return Vector3( res );
+}
+
+inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
+{
+    return Matrix3(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::identity( )
+{
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationX( float radians )
+{
+    return rotationX( floatInVec(radians) );
+}
+
+inline const Matrix3 Matrix3::rotationX( const floatInVec &radians )
+{
+    __m128 s, c, res1, res2;
+    __m128 zero;
+	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res1 = vec_sel( zero, c, select_y );
+    res1 = vec_sel( res1, s, select_z );
+    res2 = vec_sel( zero, negatef4(s), select_y );
+    res2 = vec_sel( res2, c, select_z );
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3( res1 ),
+        Vector3( res2 )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationY( float radians )
+{
+    return rotationY( floatInVec(radians) );
+}
+
+inline const Matrix3 Matrix3::rotationY( const floatInVec &radians )
+{
+    __m128 s, c, res0, res2;
+    __m128 zero;
+	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, negatef4(s), select_z );
+    res2 = vec_sel( zero, s, select_x );
+    res2 = vec_sel( res2, c, select_z );
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3::yAxis( ),
+        Vector3( res2 )
+	);
+}
+
+inline const Matrix3 Matrix3::rotationZ( float radians )
+{
+    return rotationZ( floatInVec(radians) );
+}
+
+inline const Matrix3 Matrix3::rotationZ( const floatInVec &radians )
+{
+    __m128 s, c, res0, res1;
+    __m128 zero;
+	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+    zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, s, select_y );
+    res1 = vec_sel( zero, negatef4(s), select_x );
+    res1 = vec_sel( res1, c, select_y );
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3::zAxis( )
+	);
+}
+
+inline const Matrix3 Matrix3::rotationZYX( const Vector3 &radiansXYZ )
+{
+    __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    angles = Vector4( radiansXYZ, 0.0f ).get128();
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = vec_mergel( c, s );
+    Z1 = vec_mergel( negS, c );
+	__declspec(align(16)) unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
+    Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) );
+	Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) );
+	Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) );
+    X0 = vec_splat( s, 0 );
+    X1 = vec_splat( c, 0 );
+    tmp = vec_mul( Z0, Y1 );
+    return Matrix3(
+        Vector3( vec_mul( Z0, Y0 ) ),
+        Vector3( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ),
+        Vector3( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( float radians, const Vector3 &unitVec )
+{
+    return rotation( floatInVec(radians), unitVec );
+}
+
+inline const Matrix3 Matrix3::rotation( const floatInVec &radians, const Vector3 &unitVec )
+{
+    __m128 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
+    axis = unitVec.get128();
+    sincosf4( radians.get128(), &s, &c );
+    xxxx = vec_splat( axis, 0 );
+    yyyy = vec_splat( axis, 1 );
+    zzzz = vec_splat( axis, 2 );
+    oneMinusC = vec_sub( _mm_set1_ps(1.0f), c );
+    axisS = vec_mul( axis, s );
+    negAxisS = negatef4( axisS );
+	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    //tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
+	tmp0 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,2,0) );
+	tmp0 = vec_sel(tmp0, vec_splat(negAxisS, 1), select_z);
+    //tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
+	tmp1 = vec_sel( vec_splat(axisS, 0), vec_splat(negAxisS, 2), select_x );
+    //tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
+	tmp2 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,0,1) );
+	tmp2 = vec_sel(tmp2, vec_splat(negAxisS, 0), select_y);
+    tmp0 = vec_sel( tmp0, c, select_x );
+    tmp1 = vec_sel( tmp1, c, select_y );
+    tmp2 = vec_sel( tmp2, c, select_z );
+    return Matrix3(
+        Vector3( vec_madd( vec_mul( axis, xxxx ), oneMinusC, tmp0 ) ),
+        Vector3( vec_madd( vec_mul( axis, yyyy ), oneMinusC, tmp1 ) ),
+        Vector3( vec_madd( vec_mul( axis, zzzz ), oneMinusC, tmp2 ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( const Quat &unitQuat )
+{
+    return Matrix3( unitQuat );
+}
+
+inline const Matrix3 Matrix3::scale( const Vector3 &scaleVec )
+{
+    __m128 zero = _mm_setzero_ps();
+	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    return Matrix3(
+        Vector3( vec_sel( zero, scaleVec.get128(), select_x ) ),
+        Vector3( vec_sel( zero, scaleVec.get128(), select_y ) ),
+        Vector3( vec_sel( zero, scaleVec.get128(), select_z ) )
+    );
+}
+
+inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 &scaleVec )
+{
+    return Matrix3(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) )
+    );
+}
+
+inline const Matrix3 prependScale( const Vector3 &scaleVec, const Matrix3 & mat )
+{
+    return Matrix3(
+        mulPerElem( mat.getCol0(), scaleVec ),
+        mulPerElem( mat.getCol1(), scaleVec ),
+        mulPerElem( mat.getCol2(), scaleVec )
+    );
+}
+
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 )
+{
+    return Matrix3(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 )
+    );
+}
+
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, const boolInVec &select1 )
+{
+    return Matrix3(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix3 & mat )
+{
+    print( mat.getRow( 0 ) );
+    print( mat.getRow( 1 ) );
+    print( mat.getRow( 2 ) );
+}
+
+inline void print( const Matrix3 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Matrix4::Matrix4( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+}
+
+inline Matrix4::Matrix4( float scalar )
+{
+    mCol0 = Vector4( scalar );
+    mCol1 = Vector4( scalar );
+    mCol2 = Vector4( scalar );
+    mCol3 = Vector4( scalar );
+}
+
+inline Matrix4::Matrix4( const floatInVec &scalar )
+{
+    mCol0 = Vector4( scalar );
+    mCol1 = Vector4( scalar );
+    mCol2 = Vector4( scalar );
+    mCol3 = Vector4( scalar );
+}
+
+inline Matrix4::Matrix4( const Transform3 & mat )
+{
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( mat.getCol3(), 1.0f );
+}
+
+inline Matrix4::Matrix4( const Vector4 &_col0, const Vector4 &_col1, const Vector4 &_col2, const Vector4 &_col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Matrix4::Matrix4( const Matrix3 & mat, const Vector3 &translateVec )
+{
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( translateVec, 1.0f );
+}
+
+inline Matrix4::Matrix4( const Quat &unitQuat, const Vector3 &translateVec )
+{
+    Matrix3 mat;
+    mat = Matrix3( unitQuat );
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( translateVec, 1.0f );
+}
+
+inline Matrix4 & Matrix4::setCol0( const Vector4 &_col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol1( const Vector4 &_col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol2( const Vector4 &_col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol3( const Vector4 &_col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol( int col, const Vector4 &vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setRow( int row, const Vector4 &vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setElem( int col, int row, float val )
+{
+    (*this)[col].setElem(row, val);
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setElem( int col, int row, const floatInVec &val )
+{
+    Vector4 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline const floatInVec Matrix4::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector4 Matrix4::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector4 Matrix4::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector4 Matrix4::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector4 Matrix4::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector4 Matrix4::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector4 & Matrix4::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+    return *this;
+}
+
+inline const Matrix4 transpose( const Matrix4 & mat )
+{
+    __m128 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3;
+    tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
+    tmp1 = vec_mergeh( mat.getCol1().get128(), mat.getCol3().get128() );
+    tmp2 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
+    tmp3 = vec_mergel( mat.getCol1().get128(), mat.getCol3().get128() );
+    res0 = vec_mergeh( tmp0, tmp1 );
+    res1 = vec_mergel( tmp0, tmp1 );
+    res2 = vec_mergeh( tmp2, tmp3 );
+    res3 = vec_mergel( tmp2, tmp3 );
+    return Matrix4(
+        Vector4( res0 ),
+        Vector4( res1 ),
+        Vector4( res2 ),
+        Vector4( res3 )
+    );
+}
+
+// TODO: Tidy
+static __declspec(align(16)) const unsigned int _vmathPNPN[4] = {0x00000000, 0x80000000, 0x00000000, 0x80000000};
+static __declspec(align(16)) const unsigned int _vmathNPNP[4] = {0x80000000, 0x00000000, 0x80000000, 0x00000000};
+static __declspec(align(16)) const float _vmathZERONE[4] = {1.0f, 0.0f, 0.0f, 1.0f};
+
+inline const Matrix4 inverse( const Matrix4 & mat )
+{
+	__m128 Va,Vb,Vc;
+	__m128 r1,r2,r3,tt,tt2;
+	__m128 sum,Det,RDet;
+	__m128 trns0,trns1,trns2,trns3;
+
+	__m128 _L1 = mat.getCol0().get128();
+	__m128 _L2 = mat.getCol1().get128();
+	__m128 _L3 = mat.getCol2().get128();
+	__m128 _L4 = mat.getCol3().get128();
+	// Calculating the minterms for the first line.
+
+	// _mm_ror_ps is just a macro using _mm_shuffle_ps().
+	tt = _L4; tt2 = _mm_ror_ps(_L3,1); 
+	Vc = _mm_mul_ps(tt2,_mm_ror_ps(tt,0));					// V3'�V4
+	Va = _mm_mul_ps(tt2,_mm_ror_ps(tt,2));					// V3'�V4"
+	Vb = _mm_mul_ps(tt2,_mm_ror_ps(tt,3));					// V3'�V4^
+
+	r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2));		// V3"�V4^ - V3^�V4"
+	r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0));		// V3^�V4' - V3'�V4^
+	r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1));		// V3'�V4" - V3"�V4'
+
+	tt = _L2;
+	Va = _mm_ror_ps(tt,1);		sum = _mm_mul_ps(Va,r1);
+	Vb = _mm_ror_ps(tt,2);		sum = _mm_add_ps(sum,_mm_mul_ps(Vb,r2));
+	Vc = _mm_ror_ps(tt,3);		sum = _mm_add_ps(sum,_mm_mul_ps(Vc,r3));
+
+	// Calculating the determinant.
+	Det = _mm_mul_ps(sum,_L1);
+	Det = _mm_add_ps(Det,_mm_movehl_ps(Det,Det));
+
+	const __m128 Sign_PNPN = _mm_load_ps((float *)_vmathPNPN);
+	const __m128 Sign_NPNP = _mm_load_ps((float *)_vmathNPNP);
+
+	__m128 mtL1 = _mm_xor_ps(sum,Sign_PNPN);
+
+	// Calculating the minterms of the second line (using previous results).
+	tt = _mm_ror_ps(_L1,1);		sum = _mm_mul_ps(tt,r1);
+	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
+	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
+	__m128 mtL2 = _mm_xor_ps(sum,Sign_NPNP);
+
+	// Testing the determinant.
+	Det = _mm_sub_ss(Det,_mm_shuffle_ps(Det,Det,1));
+
+	// Calculating the minterms of the third line.
+	tt = _mm_ror_ps(_L1,1);
+	Va = _mm_mul_ps(tt,Vb);									// V1'�V2"
+	Vb = _mm_mul_ps(tt,Vc);									// V1'�V2^
+	Vc = _mm_mul_ps(tt,_L2);								// V1'�V2
+
+	r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2));		// V1"�V2^ - V1^�V2"
+	r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0));		// V1^�V2' - V1'�V2^
+	r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1));		// V1'�V2" - V1"�V2'
+
+	tt = _mm_ror_ps(_L4,1);		sum = _mm_mul_ps(tt,r1);
+	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
+	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
+	__m128 mtL3 = _mm_xor_ps(sum,Sign_PNPN);
+
+	// Dividing is FASTER than rcp_nr! (Because rcp_nr causes many register-memory RWs).
+	RDet = _mm_div_ss(_mm_load_ss((float *)&_vmathZERONE), Det); // TODO: just 1.0f?
+	RDet = _mm_shuffle_ps(RDet,RDet,0x00);
+
+	// Devide the first 12 minterms with the determinant.
+	mtL1 = _mm_mul_ps(mtL1, RDet);
+	mtL2 = _mm_mul_ps(mtL2, RDet);
+	mtL3 = _mm_mul_ps(mtL3, RDet);
+
+	// Calculate the minterms of the forth line and devide by the determinant.
+	tt = _mm_ror_ps(_L3,1);		sum = _mm_mul_ps(tt,r1);
+	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
+	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
+	__m128 mtL4 = _mm_xor_ps(sum,Sign_NPNP);
+	mtL4 = _mm_mul_ps(mtL4, RDet);
+
+	// Now we just have to transpose the minterms matrix.
+	trns0 = _mm_unpacklo_ps(mtL1,mtL2);
+	trns1 = _mm_unpacklo_ps(mtL3,mtL4);
+	trns2 = _mm_unpackhi_ps(mtL1,mtL2);
+	trns3 = _mm_unpackhi_ps(mtL3,mtL4);
+	_L1 = _mm_movelh_ps(trns0,trns1);
+	_L2 = _mm_movehl_ps(trns1,trns0);
+	_L3 = _mm_movelh_ps(trns2,trns3);
+	_L4 = _mm_movehl_ps(trns3,trns2);
+
+    return Matrix4(
+        Vector4( _L1 ),
+        Vector4( _L2 ),
+        Vector4( _L3 ),
+        Vector4( _L4 )
+    );
+}
+
+inline const Matrix4 affineInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( inverse( affineMat ) );
+}
+
+inline const Matrix4 orthoInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( orthoInverse( affineMat ) );
+}
+
+inline const floatInVec determinant( const Matrix4 & mat )
+{
+	__m128 Va,Vb,Vc;
+	__m128 r1,r2,r3,tt,tt2;
+	__m128 sum,Det;
+
+	__m128 _L1 = mat.getCol0().get128();
+	__m128 _L2 = mat.getCol1().get128();
+	__m128 _L3 = mat.getCol2().get128();
+	__m128 _L4 = mat.getCol3().get128();
+	// Calculating the minterms for the first line.
+
+	// _mm_ror_ps is just a macro using _mm_shuffle_ps().
+	tt = _L4; tt2 = _mm_ror_ps(_L3,1); 
+	Vc = _mm_mul_ps(tt2,_mm_ror_ps(tt,0));					// V3'�V4
+	Va = _mm_mul_ps(tt2,_mm_ror_ps(tt,2));					// V3'�V4"
+	Vb = _mm_mul_ps(tt2,_mm_ror_ps(tt,3));					// V3'�V4^
+
+	r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2));		// V3"�V4^ - V3^�V4"
+	r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0));		// V3^�V4' - V3'�V4^
+	r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1));		// V3'�V4" - V3"�V4'
+
+	tt = _L2;
+	Va = _mm_ror_ps(tt,1);		sum = _mm_mul_ps(Va,r1);
+	Vb = _mm_ror_ps(tt,2);		sum = _mm_add_ps(sum,_mm_mul_ps(Vb,r2));
+	Vc = _mm_ror_ps(tt,3);		sum = _mm_add_ps(sum,_mm_mul_ps(Vc,r3));
+
+	// Calculating the determinant.
+	Det = _mm_mul_ps(sum,_L1);
+	Det = _mm_add_ps(Det,_mm_movehl_ps(Det,Det));
+
+	// Calculating the minterms of the second line (using previous results).
+	tt = _mm_ror_ps(_L1,1);		sum = _mm_mul_ps(tt,r1);
+	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
+	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
+
+	// Testing the determinant.
+	Det = _mm_sub_ss(Det,_mm_shuffle_ps(Det,Det,1));
+	return floatInVec(Det, 0);
+}
+
+inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 ),
+        ( mCol3 + mat.mCol3 )
+    );
+}
+
+inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 ),
+        ( mCol3 - mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator -( ) const
+{
+    return Matrix4(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 ),
+        ( -mCol3 )
+    );
+}
+
+inline const Matrix4 absPerElem( const Matrix4 & mat )
+{
+    return Matrix4(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() ),
+        absPerElem( mat.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::operator *( float scalar ) const
+{
+    return *this * floatInVec(scalar);
+}
+
+inline const Matrix4 Matrix4::operator *( const floatInVec &scalar ) const
+{
+    return Matrix4(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar ),
+        ( mCol3 * scalar )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( float scalar )
+{
+    return *this *= floatInVec(scalar);
+}
+
+inline Matrix4 & Matrix4::operator *=( const floatInVec &scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix4 operator *( float scalar, const Matrix4 & mat )
+{
+    return floatInVec(scalar) * mat;
+}
+
+inline const Matrix4 operator *( const floatInVec &scalar, const Matrix4 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector4 Matrix4::operator *( const Vector4 &vec ) const
+{
+    return Vector4(
+		_mm_add_ps(
+			_mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(1,1,1,1)))),
+			_mm_add_ps(_mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(2,2,2,2))), _mm_mul_ps(mCol3.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(3,3,3,3)))))
+		);
+}
+
+inline const Vector4 Matrix4::operator *( const Vector3 &vec ) const
+{
+    return Vector4(
+		_mm_add_ps(
+			_mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(1,1,1,1)))),
+			_mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(2,2,2,2))))
+		);
+}
+
+inline const Vector4 Matrix4::operator *( const Point3 &pnt ) const
+{
+    return Vector4(
+		_mm_add_ps(
+			_mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(1,1,1,1)))),
+			_mm_add_ps(_mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(2,2,2,2))), mCol3.get128()))
+		);
+}
+
+inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 ),
+        ( *this * mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
+{
+    return Matrix4(
+        ( *this * tfrm.getCol0() ),
+        ( *this * tfrm.getCol1() ),
+        ( *this * tfrm.getCol2() ),
+        ( *this * Point3( tfrm.getCol3() ) )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
+{
+    return Matrix4(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
+        mulPerElem( mat0.getCol3(), mat1.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::identity( )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
+{
+    mCol0.setXYZ( mat3.getCol0() );
+    mCol1.setXYZ( mat3.getCol1() );
+    mCol2.setXYZ( mat3.getCol2() );
+    return *this;
+}
+
+inline const Matrix3 Matrix4::getUpper3x3( ) const
+{
+    return Matrix3(
+        mCol0.getXYZ( ),
+        mCol1.getXYZ( ),
+        mCol2.getXYZ( )
+    );
+}
+
+inline Matrix4 & Matrix4::setTranslation( const Vector3 &translateVec )
+{
+    mCol3.setXYZ( translateVec );
+    return *this;
+}
+
+inline const Vector3 Matrix4::getTranslation( ) const
+{
+    return mCol3.getXYZ( );
+}
+
+inline const Matrix4 Matrix4::rotationX( float radians )
+{
+    return rotationX( floatInVec(radians) );
+}
+
+inline const Matrix4 Matrix4::rotationX( const floatInVec &radians )
+{
+    __m128 s, c, res1, res2;
+    __m128 zero;
+	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res1 = vec_sel( zero, c, select_y );
+    res1 = vec_sel( res1, s, select_z );
+    res2 = vec_sel( zero, negatef4(s), select_y );
+    res2 = vec_sel( res2, c, select_z );
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4( res1 ),
+        Vector4( res2 ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationY( float radians )
+{
+    return rotationY( floatInVec(radians) );
+}
+
+inline const Matrix4 Matrix4::rotationY( const floatInVec &radians )
+{
+    __m128 s, c, res0, res2;
+    __m128 zero;
+	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, negatef4(s), select_z );
+    res2 = vec_sel( zero, s, select_x );
+    res2 = vec_sel( res2, c, select_z );
+    return Matrix4(
+        Vector4( res0 ),
+        Vector4::yAxis( ),
+        Vector4( res2 ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZ( float radians )
+{
+    return rotationZ( floatInVec(radians) );
+}
+
+inline const Matrix4 Matrix4::rotationZ( const floatInVec &radians )
+{
+    __m128 s, c, res0, res1;
+    __m128 zero;
+	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+    zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, s, select_y );
+    res1 = vec_sel( zero, negatef4(s), select_x );
+    res1 = vec_sel( res1, c, select_y );
+    return Matrix4(
+        Vector4( res0 ),
+        Vector4( res1 ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZYX( const Vector3 &radiansXYZ )
+{
+    __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    angles = Vector4( radiansXYZ, 0.0f ).get128();
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = vec_mergel( c, s );
+    Z1 = vec_mergel( negS, c );
+	__declspec(align(16)) unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
+    Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) );
+	Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) );
+	Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) );
+    X0 = vec_splat( s, 0 );
+    X1 = vec_splat( c, 0 );
+    tmp = vec_mul( Z0, Y1 );
+    return Matrix4(
+        Vector4( vec_mul( Z0, Y0 ) ),
+        Vector4( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ),
+        Vector4( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( float radians, const Vector3 &unitVec )
+{
+    return rotation( floatInVec(radians), unitVec );
+}
+
+inline const Matrix4 Matrix4::rotation( const floatInVec &radians, const Vector3 &unitVec )
+{
+    __m128 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
+    axis = unitVec.get128();
+    sincosf4( radians.get128(), &s, &c );
+    xxxx = vec_splat( axis, 0 );
+    yyyy = vec_splat( axis, 1 );
+    zzzz = vec_splat( axis, 2 );
+    oneMinusC = vec_sub( _mm_set1_ps(1.0f), c );
+    axisS = vec_mul( axis, s );
+    negAxisS = negatef4( axisS );
+	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    //tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
+	tmp0 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,2,0) );
+	tmp0 = vec_sel(tmp0, vec_splat(negAxisS, 1), select_z);
+    //tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
+	tmp1 = vec_sel( vec_splat(axisS, 0), vec_splat(negAxisS, 2), select_x );
+    //tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
+	tmp2 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,0,1) );
+	tmp2 = vec_sel(tmp2, vec_splat(negAxisS, 0), select_y);
+    tmp0 = vec_sel( tmp0, c, select_x );
+    tmp1 = vec_sel( tmp1, c, select_y );
+    tmp2 = vec_sel( tmp2, c, select_z );
+	__declspec(align(16)) unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
+    axis = vec_and( axis, _mm_load_ps( (float *)select_xyz ) );
+    tmp0 = vec_and( tmp0, _mm_load_ps( (float *)select_xyz ) );
+    tmp1 = vec_and( tmp1, _mm_load_ps( (float *)select_xyz ) );
+    tmp2 = vec_and( tmp2, _mm_load_ps( (float *)select_xyz ) );
+    return Matrix4(
+        Vector4( vec_madd( vec_mul( axis, xxxx ), oneMinusC, tmp0 ) ),
+        Vector4( vec_madd( vec_mul( axis, yyyy ), oneMinusC, tmp1 ) ),
+        Vector4( vec_madd( vec_mul( axis, zzzz ), oneMinusC, tmp2 ) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( const Quat &unitQuat )
+{
+    return Matrix4( Transform3::rotation( unitQuat ) );
+}
+
+inline const Matrix4 Matrix4::scale( const Vector3 &scaleVec )
+{
+    __m128 zero = _mm_setzero_ps();
+	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    return Matrix4(
+        Vector4( vec_sel( zero, scaleVec.get128(), select_x ) ),
+        Vector4( vec_sel( zero, scaleVec.get128(), select_y ) ),
+        Vector4( vec_sel( zero, scaleVec.get128(), select_z ) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 &scaleVec )
+{
+    return Matrix4(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) ),
+        mat.getCol3()
+    );
+}
+
+inline const Matrix4 prependScale( const Vector3 &scaleVec, const Matrix4 & mat )
+{
+    Vector4 scale4;
+    scale4 = Vector4( scaleVec, 1.0f );
+    return Matrix4(
+        mulPerElem( mat.getCol0(), scale4 ),
+        mulPerElem( mat.getCol1(), scale4 ),
+        mulPerElem( mat.getCol2(), scale4 ),
+        mulPerElem( mat.getCol3(), scale4 )
+    );
+}
+
+inline const Matrix4 Matrix4::translation( const Vector3 &translateVec )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4( translateVec, 1.0f )
+    );
+}
+
+inline const Matrix4 Matrix4::lookAt( const Point3 &eyePos, const Point3 &lookAtPos, const Vector3 &upVec )
+{
+    Matrix4 m4EyeFrame;
+    Vector3 v3X, v3Y, v3Z;
+    v3Y = normalize( upVec );
+    v3Z = normalize( ( eyePos - lookAtPos ) );
+    v3X = normalize( cross( v3Y, v3Z ) );
+    v3Y = cross( v3Z, v3X );
+    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
+    return orthoInverse( m4EyeFrame );
+}
+
+inline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar )
+{
+    float f, rangeInv;
+    __m128 zero, col0, col1, col2, col3;
+    union { __m128 v; float s[4]; } tmp;
+    f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f );
+    rangeInv = 1.0f / ( zNear - zFar );
+    zero = _mm_setzero_ps();
+    tmp.v = zero;
+    tmp.s[0] = f / aspect;
+    col0 = tmp.v;
+    tmp.v = zero;
+    tmp.s[1] = f;
+    col1 = tmp.v;
+    tmp.v = zero;
+    tmp.s[2] = ( zNear + zFar ) * rangeInv;
+    tmp.s[3] = -1.0f;
+    col2 = tmp.v;
+    tmp.v = zero;
+    tmp.s[2] = zNear * zFar * rangeInv * 2.0f;
+    col3 = tmp.v;
+    return Matrix4(
+        Vector4( col0 ),
+        Vector4( col1 ),
+        Vector4( col2 ),
+        Vector4( col3 )
+    );
+}
+
+inline const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    __m128 lbf, rtn;
+    __m128 diff, sum, inv_diff;
+    __m128 diagonal, column, near2;
+    __m128 zero = _mm_setzero_ps();
+    union { __m128 v; float s[4]; } l, f, r, n, b, t; // TODO: Union?
+    l.s[0] = left;
+    f.s[0] = zFar;
+    r.s[0] = right;
+    n.s[0] = zNear;
+    b.s[0] = bottom;
+    t.s[0] = top;
+    lbf = vec_mergeh( l.v, f.v );
+    rtn = vec_mergeh( r.v, n.v );
+    lbf = vec_mergeh( lbf, b.v );
+    rtn = vec_mergeh( rtn, t.v );
+    diff = vec_sub( rtn, lbf );
+    sum  = vec_add( rtn, lbf );
+    inv_diff = recipf4( diff );
+    near2 = vec_splat( n.v, 0 );
+    near2 = vec_add( near2, near2 );
+    diagonal = vec_mul( near2, inv_diff );
+    column = vec_mul( sum, inv_diff );
+	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+	__declspec(align(16)) unsigned int select_w[4] = {0, 0, 0, 0xffffffff};
+    return Matrix4(
+        Vector4( vec_sel( zero, diagonal, select_x ) ),
+        Vector4( vec_sel( zero, diagonal, select_y ) ),
+        Vector4( vec_sel( column, _mm_set1_ps(-1.0f), select_w ) ),
+        Vector4( vec_sel( zero, vec_mul( diagonal, vec_splat( f.v, 0 ) ), select_z ) )
+	);
+}
+
+inline const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    __m128 lbf, rtn;
+    __m128 diff, sum, inv_diff, neg_inv_diff;
+    __m128 diagonal, column;
+    __m128 zero = _mm_setzero_ps();
+    union { __m128 v; float s[4]; } l, f, r, n, b, t;
+    l.s[0] = left;
+    f.s[0] = zFar;
+    r.s[0] = right;
+    n.s[0] = zNear;
+    b.s[0] = bottom;
+    t.s[0] = top;
+    lbf = vec_mergeh( l.v, f.v );
+    rtn = vec_mergeh( r.v, n.v );
+    lbf = vec_mergeh( lbf, b.v );
+    rtn = vec_mergeh( rtn, t.v );
+    diff = vec_sub( rtn, lbf );
+    sum  = vec_add( rtn, lbf );
+    inv_diff = recipf4( diff );
+    neg_inv_diff = negatef4( inv_diff );
+    diagonal = vec_add( inv_diff, inv_diff );
+	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+	__declspec(align(16)) unsigned int select_w[4] = {0, 0, 0, 0xffffffff};
+    column = vec_mul( sum, vec_sel( neg_inv_diff, inv_diff, select_z ) ); // TODO: no madds with zero
+    return Matrix4(
+        Vector4( vec_sel( zero, diagonal, select_x ) ),
+        Vector4( vec_sel( zero, diagonal, select_y ) ),
+        Vector4( vec_sel( zero, diagonal, select_z ) ),
+        Vector4( vec_sel( column, _mm_set1_ps(1.0f), select_w ) )
+    );
+}
+
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 )
+{
+    return Matrix4(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 ),
+        select( mat0.getCol3(), mat1.getCol3(), select1 )
+    );
+}
+
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, const boolInVec &select1 )
+{
+    return Matrix4(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 ),
+        select( mat0.getCol3(), mat1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix4 & mat )
+{
+    print( mat.getRow( 0 ) );
+    print( mat.getRow( 1 ) );
+    print( mat.getRow( 2 ) );
+    print( mat.getRow( 3 ) );
+}
+
+inline void print( const Matrix4 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Transform3::Transform3( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+}
+
+inline Transform3::Transform3( float scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+    mCol3 = Vector3( scalar );
+}
+
+inline Transform3::Transform3( const floatInVec &scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+    mCol3 = Vector3( scalar );
+}
+
+inline Transform3::Transform3( const Vector3 &_col0, const Vector3 &_col1, const Vector3 &_col2, const Vector3 &_col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Transform3::Transform3( const Matrix3 & tfrm, const Vector3 &translateVec )
+{
+    this->setUpper3x3( tfrm );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3::Transform3( const Quat &unitQuat, const Vector3 &translateVec )
+{
+    this->setUpper3x3( Matrix3( unitQuat ) );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3 & Transform3::setCol0( const Vector3 &_col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol1( const Vector3 &_col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol2( const Vector3 &_col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol3( const Vector3 &_col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol( int col, const Vector3 &vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Transform3 & Transform3::setRow( int row, const Vector4 &vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Transform3 & Transform3::setElem( int col, int row, float val )
+{
+    (*this)[col].setElem(row, val);
+    return *this;
+}
+
+inline Transform3 & Transform3::setElem( int col, int row, const floatInVec &val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline const floatInVec Transform3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Transform3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Transform3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Transform3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Transform3::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector3 Transform3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Transform3::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector3 & Transform3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Transform3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+    return *this;
+}
+
+inline const Transform3 inverse( const Transform3 & tfrm )
+{
+    __m128 inv0, inv1, inv2, inv3;
+    __m128 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet;
+    __m128 xxxx, yyyy, zzzz;
+    tmp2 = _vmathVfCross( tfrm.getCol0().get128(), tfrm.getCol1().get128() );
+    tmp0 = _vmathVfCross( tfrm.getCol1().get128(), tfrm.getCol2().get128() );
+    tmp1 = _vmathVfCross( tfrm.getCol2().get128(), tfrm.getCol0().get128() );
+    inv3 = negatef4( tfrm.getCol3().get128() );
+    dot = _vmathVfDot3( tmp2, tfrm.getCol2().get128() );
+    dot = vec_splat( dot, 0 );
+    invdet = recipf4( dot );
+    tmp3 = vec_mergeh( tmp0, tmp2 );
+    tmp4 = vec_mergel( tmp0, tmp2 );
+    inv0 = vec_mergeh( tmp3, tmp1 );
+    xxxx = vec_splat( inv3, 0 );
+    //inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
+	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	inv1 = _mm_shuffle_ps( tmp3, tmp3, _MM_SHUFFLE(0,3,2,2));
+	inv1 = vec_sel(inv1, tmp1, select_y);
+    //inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
+	inv2 = _mm_shuffle_ps( tmp4, tmp4, _MM_SHUFFLE(0,1,1,0));
+	inv2 = vec_sel(inv2, vec_splat(tmp1, 2), select_y);
+    yyyy = vec_splat( inv3, 1 );
+    zzzz = vec_splat( inv3, 2 );
+    inv3 = vec_mul( inv0, xxxx );
+    inv3 = vec_madd( inv1, yyyy, inv3 );
+    inv3 = vec_madd( inv2, zzzz, inv3 );
+    inv0 = vec_mul( inv0, invdet );
+    inv1 = vec_mul( inv1, invdet );
+    inv2 = vec_mul( inv2, invdet );
+    inv3 = vec_mul( inv3, invdet );
+    return Transform3(
+        Vector3( inv0 ),
+        Vector3( inv1 ),
+        Vector3( inv2 ),
+        Vector3( inv3 )
+    );
+}
+
+inline const Transform3 orthoInverse( const Transform3 & tfrm )
+{
+    __m128 inv0, inv1, inv2, inv3;
+    __m128 tmp0, tmp1;
+    __m128 xxxx, yyyy, zzzz;
+    tmp0 = vec_mergeh( tfrm.getCol0().get128(), tfrm.getCol2().get128() );
+    tmp1 = vec_mergel( tfrm.getCol0().get128(), tfrm.getCol2().get128() );
+    inv3 = negatef4( tfrm.getCol3().get128() );
+    inv0 = vec_mergeh( tmp0, tfrm.getCol1().get128() );
+    xxxx = vec_splat( inv3, 0 );
+    //inv1 = vec_perm( tmp0, tfrm.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
+	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	inv1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2));
+	inv1 = vec_sel(inv1, tfrm.getCol1().get128(), select_y);
+    //inv2 = vec_perm( tmp1, tfrm.getCol1().get128(), _VECTORMATH_PERM_XCYX );
+	inv2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0));
+	inv2 = vec_sel(inv2, vec_splat(tfrm.getCol1().get128(), 2), select_y);
+    yyyy = vec_splat( inv3, 1 );
+    zzzz = vec_splat( inv3, 2 );
+    inv3 = vec_mul( inv0, xxxx );
+    inv3 = vec_madd( inv1, yyyy, inv3 );
+    inv3 = vec_madd( inv2, zzzz, inv3 );
+    return Transform3(
+        Vector3( inv0 ),
+        Vector3( inv1 ),
+        Vector3( inv2 ),
+        Vector3( inv3 )
+    );
+}
+
+inline const Transform3 absPerElem( const Transform3 & tfrm )
+{
+    return Transform3(
+        absPerElem( tfrm.getCol0() ),
+        absPerElem( tfrm.getCol1() ),
+        absPerElem( tfrm.getCol2() ),
+        absPerElem( tfrm.getCol3() )
+    );
+}
+
+inline const Vector3 Transform3::operator *( const Vector3 &vec ) const
+{
+    __m128 res;
+    __m128 xxxx, yyyy, zzzz;
+    xxxx = vec_splat( vec.get128(), 0 );
+    yyyy = vec_splat( vec.get128(), 1 );
+    zzzz = vec_splat( vec.get128(), 2 );
+    res = vec_mul( mCol0.get128(), xxxx );
+    res = vec_madd( mCol1.get128(), yyyy, res );
+    res = vec_madd( mCol2.get128(), zzzz, res );
+    return Vector3( res );
+}
+
+inline const Point3 Transform3::operator *( const Point3 &pnt ) const
+{
+    __m128 tmp0, tmp1, res;
+    __m128 xxxx, yyyy, zzzz;
+    xxxx = vec_splat( pnt.get128(), 0 );
+    yyyy = vec_splat( pnt.get128(), 1 );
+    zzzz = vec_splat( pnt.get128(), 2 );
+    tmp0 = vec_mul( mCol0.get128(), xxxx );
+    tmp1 = vec_mul( mCol1.get128(), yyyy );
+    tmp0 = vec_madd( mCol2.get128(), zzzz, tmp0 );
+    tmp1 = vec_add( mCol3.get128(), tmp1 );
+    res = vec_add( tmp0, tmp1 );
+    return Point3( res );
+}
+
+inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
+{
+    return Transform3(
+        ( *this * tfrm.mCol0 ),
+        ( *this * tfrm.mCol1 ),
+        ( *this * tfrm.mCol2 ),
+        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
+    );
+}
+
+inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
+{
+    return Transform3(
+        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
+        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
+        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
+        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
+    );
+}
+
+inline const Transform3 Transform3::identity( )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        Vector3( 0.0f )
+    );
+}
+
+inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
+{
+    mCol0 = tfrm.getCol0();
+    mCol1 = tfrm.getCol1();
+    mCol2 = tfrm.getCol2();
+    return *this;
+}
+
+inline const Matrix3 Transform3::getUpper3x3( ) const
+{
+    return Matrix3( mCol0, mCol1, mCol2 );
+}
+
+inline Transform3 & Transform3::setTranslation( const Vector3 &translateVec )
+{
+    mCol3 = translateVec;
+    return *this;
+}
+
+inline const Vector3 Transform3::getTranslation( ) const
+{
+    return mCol3;
+}
+
+inline const Transform3 Transform3::rotationX( float radians )
+{
+    return rotationX( floatInVec(radians) );
+}
+
+inline const Transform3 Transform3::rotationX( const floatInVec &radians )
+{
+    __m128 s, c, res1, res2;
+    __m128 zero;
+	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res1 = vec_sel( zero, c, select_y );
+    res1 = vec_sel( res1, s, select_z );
+    res2 = vec_sel( zero, negatef4(s), select_y );
+    res2 = vec_sel( res2, c, select_z );
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3( res1 ),
+        Vector3( res2 ),
+        Vector3( _mm_setzero_ps() )
+    );
+}
+
+inline const Transform3 Transform3::rotationY( float radians )
+{
+    return rotationY( floatInVec(radians) );
+}
+
+inline const Transform3 Transform3::rotationY( const floatInVec &radians )
+{
+    __m128 s, c, res0, res2;
+    __m128 zero;
+	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, negatef4(s), select_z );
+    res2 = vec_sel( zero, s, select_x );
+    res2 = vec_sel( res2, c, select_z );
+    return Transform3(
+        Vector3( res0 ),
+        Vector3::yAxis( ),
+        Vector3( res2 ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationZ( float radians )
+{
+    return rotationZ( floatInVec(radians) );
+}
+
+inline const Transform3 Transform3::rotationZ( const floatInVec &radians )
+{
+    __m128 s, c, res0, res1;
+	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+    __m128 zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, s, select_y );
+    res1 = vec_sel( zero, negatef4(s), select_x );
+    res1 = vec_sel( res1, c, select_y );
+    return Transform3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3::zAxis( ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationZYX( const Vector3 &radiansXYZ )
+{
+    __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    angles = Vector4( radiansXYZ, 0.0f ).get128();
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = vec_mergel( c, s );
+    Z1 = vec_mergel( negS, c );
+	__declspec(align(16)) unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
+    Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) );
+	Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) );
+	Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) );
+    X0 = vec_splat( s, 0 );
+    X1 = vec_splat( c, 0 );
+    tmp = vec_mul( Z0, Y1 );
+    return Transform3(
+        Vector3( vec_mul( Z0, Y0 ) ),
+        Vector3( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ),
+        Vector3( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotation( float radians, const Vector3 &unitVec )
+{
+    return rotation( floatInVec(radians), unitVec );
+}
+
+inline const Transform3 Transform3::rotation( const floatInVec &radians, const Vector3 &unitVec )
+{
+    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) );
+}
+
+inline const Transform3 Transform3::rotation( const Quat &unitQuat )
+{
+    return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) );
+}
+
+inline const Transform3 Transform3::scale( const Vector3 &scaleVec )
+{
+    __m128 zero = _mm_setzero_ps();
+	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    return Transform3(
+        Vector3( vec_sel( zero, scaleVec.get128(), select_x ) ),
+        Vector3( vec_sel( zero, scaleVec.get128(), select_y ) ),
+        Vector3( vec_sel( zero, scaleVec.get128(), select_z ) ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 &scaleVec )
+{
+    return Transform3(
+        ( tfrm.getCol0() * scaleVec.getX( ) ),
+        ( tfrm.getCol1() * scaleVec.getY( ) ),
+        ( tfrm.getCol2() * scaleVec.getZ( ) ),
+        tfrm.getCol3()
+    );
+}
+
+inline const Transform3 prependScale( const Vector3 &scaleVec, const Transform3 & tfrm )
+{
+    return Transform3(
+        mulPerElem( tfrm.getCol0(), scaleVec ),
+        mulPerElem( tfrm.getCol1(), scaleVec ),
+        mulPerElem( tfrm.getCol2(), scaleVec ),
+        mulPerElem( tfrm.getCol3(), scaleVec )
+    );
+}
+
+inline const Transform3 Transform3::translation( const Vector3 &translateVec )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        translateVec
+    );
+}
+
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 )
+{
+    return Transform3(
+        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
+        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
+        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
+        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
+    );
+}
+
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, const boolInVec &select1 )
+{
+    return Transform3(
+        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
+        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
+        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
+        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Transform3 & tfrm )
+{
+    print( tfrm.getRow( 0 ) );
+    print( tfrm.getRow( 1 ) );
+    print( tfrm.getRow( 2 ) );
+}
+
+inline void print( const Transform3 & tfrm, const char * name )
+{
+    printf("%s:\n", name);
+    print( tfrm );
+}
+
+#endif
+
+inline Quat::Quat( const Matrix3 & tfrm )
+{
+    __m128 res;
+    __m128 col0, col1, col2;
+    __m128 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff;
+    __m128 zy_xz_yx, yz_zx_xy, sum, diff;
+    __m128 radicand, invSqrt, scale;
+    __m128 res0, res1, res2, res3;
+    __m128 xx, yy, zz;
+	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+	__declspec(align(16)) unsigned int select_w[4] = {0, 0, 0, 0xffffffff};
+
+    col0 = tfrm.getCol0().get128();
+    col1 = tfrm.getCol1().get128();
+    col2 = tfrm.getCol2().get128();
+
+    /* four cases: */
+    /* trace > 0 */
+    /* else */
+    /*    xx largest diagonal element */
+    /*    yy largest diagonal element */
+    /*    zz largest diagonal element */
+
+    /* compute quaternion for each case */
+
+    xx_yy = vec_sel( col0, col1, select_y );
+    //xx_yy_zz_xx = vec_perm( xx_yy, col2, _VECTORMATH_PERM_XYCX );
+    //yy_zz_xx_yy = vec_perm( xx_yy, col2, _VECTORMATH_PERM_YCXY );
+    //zz_xx_yy_zz = vec_perm( xx_yy, col2, _VECTORMATH_PERM_CXYC );
+    xx_yy_zz_xx = _mm_shuffle_ps( xx_yy, xx_yy, _MM_SHUFFLE(0,0,1,0) );
+    xx_yy_zz_xx = vec_sel( xx_yy_zz_xx, col2, select_z ); // TODO: Ck
+    yy_zz_xx_yy = _mm_shuffle_ps( xx_yy_zz_xx, xx_yy_zz_xx, _MM_SHUFFLE(1,0,2,1) );
+    zz_xx_yy_zz = _mm_shuffle_ps( xx_yy_zz_xx, xx_yy_zz_xx, _MM_SHUFFLE(2,1,0,2) );
+
+    diagSum = vec_add( vec_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
+    diagDiff = vec_sub( vec_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
+    radicand = vec_add( vec_sel( diagDiff, diagSum, select_w ), _mm_set1_ps(1.0f) );
+    invSqrt = rsqrtf4( radicand );
+
+    zy_xz_yx = vec_sel( col0, col1, select_z );									// zy_xz_yx = 00 01 12 03
+    //zy_xz_yx = vec_perm( zy_xz_yx, col2, _VECTORMATH_PERM_ZAYX );
+	zy_xz_yx = _mm_shuffle_ps( zy_xz_yx, zy_xz_yx, _MM_SHUFFLE(0,1,2,2) );		// zy_xz_yx = 12 12 01 00
+    zy_xz_yx = vec_sel( zy_xz_yx, vec_splat(col2, 0), select_y );				// zy_xz_yx = 12 20 01 00
+    yz_zx_xy = vec_sel( col0, col1, select_x );									// yz_zx_xy = 10 01 02 03
+    //yz_zx_xy = vec_perm( yz_zx_xy, col2, _VECTORMATH_PERM_BZXX );
+	yz_zx_xy = _mm_shuffle_ps( yz_zx_xy, yz_zx_xy, _MM_SHUFFLE(0,0,2,0) );		// yz_zx_xy = 10 02 10 10
+	yz_zx_xy = vec_sel( yz_zx_xy, vec_splat(col2, 1), select_x );				// yz_zx_xy = 21 02 10 10
+
+    sum = vec_add( zy_xz_yx, yz_zx_xy );
+    diff = vec_sub( zy_xz_yx, yz_zx_xy );
+
+    scale = vec_mul( invSqrt, _mm_set1_ps(0.5f) );
+
+    //res0 = vec_perm( sum, diff, _VECTORMATH_PERM_XZYA );
+	res0 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,1,2,0) );
+	res0 = vec_sel( res0, vec_splat(diff, 0), select_w );  // TODO: Ck
+    //res1 = vec_perm( sum, diff, _VECTORMATH_PERM_ZXXB );
+	res1 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,0,0,2) );
+	res1 = vec_sel( res1, vec_splat(diff, 1), select_w );  // TODO: Ck
+    //res2 = vec_perm( sum, diff, _VECTORMATH_PERM_YXXC );
+	res2 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,0,0,1) );
+	res2 = vec_sel( res2, vec_splat(diff, 2), select_w );  // TODO: Ck
+    res3 = diff;
+    res0 = vec_sel( res0, radicand, select_x );
+    res1 = vec_sel( res1, radicand, select_y );
+    res2 = vec_sel( res2, radicand, select_z );
+    res3 = vec_sel( res3, radicand, select_w );
+    res0 = vec_mul( res0, vec_splat( scale, 0 ) );
+    res1 = vec_mul( res1, vec_splat( scale, 1 ) );
+    res2 = vec_mul( res2, vec_splat( scale, 2 ) );
+    res3 = vec_mul( res3, vec_splat( scale, 3 ) );
+
+    /* determine case and select answer */
+
+    xx = vec_splat( col0, 0 );
+    yy = vec_splat( col1, 1 );
+    zz = vec_splat( col2, 2 );
+    res = vec_sel( res0, res1, vec_cmpgt( yy, xx ) );
+    res = vec_sel( res, res2, vec_and( vec_cmpgt( zz, xx ), vec_cmpgt( zz, yy ) ) );
+    res = vec_sel( res, res3, vec_cmpgt( vec_splat( diagSum, 0 ), _mm_setzero_ps() ) );
+    mVec128 = res;
+}
+
+inline const Matrix3 outer( const Vector3 &tfrm0, const Vector3 &tfrm1 )
+{
+    return Matrix3(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) )
+    );
+}
+
+inline const Matrix4 outer( const Vector4 &tfrm0, const Vector4 &tfrm1 )
+{
+    return Matrix4(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) ),
+        ( tfrm0 * tfrm1.getW( ) )
+    );
+}
+
+inline const Vector3 rowMul( const Vector3 &vec, const Matrix3 & mat )
+{
+    __m128 tmp0, tmp1, mcol0, mcol1, mcol2, res;
+    __m128 xxxx, yyyy, zzzz;
+    tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
+    tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
+    xxxx = vec_splat( vec.get128(), 0 );
+    mcol0 = vec_mergeh( tmp0, mat.getCol1().get128() );
+    //mcol1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
+	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	mcol1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2));
+	mcol1 = vec_sel(mcol1, mat.getCol1().get128(), select_y);
+    //mcol2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX );
+	mcol2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0));
+	mcol2 = vec_sel(mcol2, vec_splat(mat.getCol1().get128(), 2), select_y);
+    yyyy = vec_splat( vec.get128(), 1 );
+    res = vec_mul( mcol0, xxxx );
+    zzzz = vec_splat( vec.get128(), 2 );
+    res = vec_madd( mcol1, yyyy, res );
+    res = vec_madd( mcol2, zzzz, res );
+    return Vector3( res );
+}
+
+inline const Matrix3 crossMatrix( const Vector3 &vec )
+{
+    __m128 neg, res0, res1, res2;
+    neg = negatef4( vec.get128() );
+	__declspec(align(16)) unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	__declspec(align(16)) unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	__declspec(align(16)) unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    //res0 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_XZBX );
+	res0 = _mm_shuffle_ps( vec.get128(), vec.get128(), _MM_SHUFFLE(0,2,2,0) );
+	res0 = vec_sel(res0, vec_splat(neg, 1), select_z);
+    //res1 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_CXXX );
+	res1 = vec_sel(vec_splat(vec.get128(), 0), vec_splat(neg, 2), select_x);
+    //res2 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_YAXX );
+	res2 = _mm_shuffle_ps( vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,1,1) );
+	res2 = vec_sel(res2, vec_splat(neg, 0), select_y);
+	__declspec(align(16)) unsigned int filter_x[4] = {0, 0xffffffff, 0xffffffff, 0xffffffff};
+	__declspec(align(16)) unsigned int filter_y[4] = {0xffffffff, 0, 0xffffffff, 0xffffffff};
+	__declspec(align(16)) unsigned int filter_z[4] = {0xffffffff, 0xffffffff, 0, 0xffffffff};
+    res0 = vec_and( res0, _mm_load_ps((float *)filter_x ) );
+    res1 = vec_and( res1, _mm_load_ps((float *)filter_y ) );
+    res2 = vec_and( res2, _mm_load_ps((float *)filter_z ) ); // TODO: Use selects?
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3( res2 )
+    );
+}
+
+inline const Matrix3 crossMatrixMul( const Vector3 &vec, const Matrix3 & mat )
+{
+    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
+}
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/quat_aos.h b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/quat_aos.h
index c91f5db41..a7cd4e145 100644
--- a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/quat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/quat_aos.h
@@ -1,533 +1,533 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-
-#ifndef _VECTORMATH_QUAT_AOS_CPP_H
-#define _VECTORMATH_QUAT_AOS_CPP_H
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-namespace Vectormath {
-namespace Aos {
-
-inline Quat::Quat( float _x, float _y, float _z, float _w )
-{
-    mVec128 = _mm_setr_ps(_x, _y, _z, _w);
-}
-
-inline Quat::Quat( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z, const floatInVec &_w )
-{
-	mVec128 = _mm_unpacklo_ps(
-		_mm_unpacklo_ps( _x.get128(), _z.get128() ),
-		_mm_unpacklo_ps( _y.get128(), _w.get128() ) );
-}
-
-inline Quat::Quat( const Vector3 &xyz, float _w )
-{
-    mVec128 = xyz.get128();
-    _vmathVfSetElement(mVec128, _w, 3);
-}
-
-inline Quat::Quat( const Vector3 &xyz, const floatInVec &_w )
-{
-    mVec128 = xyz.get128();
-    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
-}
-
-inline Quat::Quat( const Vector4 &vec )
-{
-    mVec128 = vec.get128();
-}
-
-inline Quat::Quat( float scalar )
-{
-    mVec128 = floatInVec(scalar).get128();
-}
-
-inline Quat::Quat( const floatInVec &scalar )
-{
-    mVec128 = scalar.get128();
-}
-
-inline Quat::Quat( __m128 vf4 )
-{
-    mVec128 = vf4;
-}
-
-inline const Quat Quat::identity( )
-{
-    return Quat( _VECTORMATH_UNIT_0001 );
-}
-
-inline const Quat lerp( float t, const Quat &quat0, const Quat &quat1 )
-{
-    return lerp( floatInVec(t), quat0, quat1 );
-}
-
-inline const Quat lerp( const floatInVec &t, const Quat &quat0, const Quat &quat1 )
-{
-    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
-}
-
-inline const Quat slerp( float t, const Quat &unitQuat0, const Quat &unitQuat1 )
-{
-    return slerp( floatInVec(t), unitQuat0, unitQuat1 );
-}
-
-inline const Quat slerp( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1 )
-{
-    Quat start;
-    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    vec_uint4 selectMask;
-    cosAngle = _vmathVfDot4( unitQuat0.get128(), unitQuat1.get128() );
-    selectMask = (vec_uint4)vec_cmpgt( _mm_setzero_ps(), cosAngle );
-    cosAngle = vec_sel( cosAngle, negatef4( cosAngle ), selectMask );
-    start = Quat( vec_sel( unitQuat0.get128(), negatef4( unitQuat0.get128() ), selectMask ) );
-    selectMask = (vec_uint4)vec_cmpgt( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = t.get128();
-    oneMinusT = vec_sub( _mm_set1_ps(1.0f), tttt );
-    angles = vec_mergeh( _mm_set1_ps(1.0f), tttt );
-    angles = vec_mergeh( angles, oneMinusT );
-    angles = vec_madd( angles, angle, _mm_setzero_ps() );
-    sines = sinf4( angles );
-    scales = _mm_div_ps( sines, vec_splat( sines, 0 ) );
-    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
-    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
-    return Quat( vec_madd( start.get128(), scale0, vec_mul( unitQuat1.get128(), scale1 ) ) );
-}
-
-inline const Quat squad( float t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 )
-{
-    return squad( floatInVec(t), unitQuat0, unitQuat1, unitQuat2, unitQuat3 );
-}
-
-inline const Quat squad( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 )
-{
-    return slerp( ( ( floatInVec(2.0f) * t ) * ( floatInVec(1.0f) - t ) ), slerp( t, unitQuat0, unitQuat3 ), slerp( t, unitQuat1, unitQuat2 ) );
-}
-
-inline __m128 Quat::get128( ) const
-{
-    return mVec128;
-}
-
-inline Quat & Quat::operator =( const Quat &quat )
-{
-    mVec128 = quat.mVec128;
-    return *this;
-}
-
-inline Quat & Quat::setXYZ( const Vector3 &vec )
-{
-	__declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff};
-	mVec128 = vec_sel( vec.get128(), mVec128, sw );
-    return *this;
-}
-
-inline const Vector3 Quat::getXYZ( ) const
-{
-    return Vector3( mVec128 );
-}
-
-inline Quat & Quat::setX( float _x )
-{
-    _vmathVfSetElement(mVec128, _x, 0);
-    return *this;
-}
-
-inline Quat & Quat::setX( const floatInVec &_x )
-{
-    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
-    return *this;
-}
-
-inline const floatInVec Quat::getX( ) const
-{
-    return floatInVec( mVec128, 0 );
-}
-
-inline Quat & Quat::setY( float _y )
-{
-    _vmathVfSetElement(mVec128, _y, 1);
-    return *this;
-}
-
-inline Quat & Quat::setY( const floatInVec &_y )
-{
-    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
-    return *this;
-}
-
-inline const floatInVec Quat::getY( ) const
-{
-    return floatInVec( mVec128, 1 );
-}
-
-inline Quat & Quat::setZ( float _z )
-{
-    _vmathVfSetElement(mVec128, _z, 2);
-    return *this;
-}
-
-inline Quat & Quat::setZ( const floatInVec &_z )
-{
-    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
-    return *this;
-}
-
-inline const floatInVec Quat::getZ( ) const
-{
-    return floatInVec( mVec128, 2 );
-}
-
-inline Quat & Quat::setW( float _w )
-{
-    _vmathVfSetElement(mVec128, _w, 3);
-    return *this;
-}
-
-inline Quat & Quat::setW( const floatInVec &_w )
-{
-    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
-    return *this;
-}
-
-inline const floatInVec Quat::getW( ) const
-{
-    return floatInVec( mVec128, 3 );
-}
-
-inline Quat & Quat::setElem( int idx, float value )
-{
-    _vmathVfSetElement(mVec128, value, idx);
-    return *this;
-}
-
-inline Quat & Quat::setElem( int idx, const floatInVec &value )
-{
-    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
-    return *this;
-}
-
-inline const floatInVec Quat::getElem( int idx ) const
-{
-    return floatInVec( mVec128, idx );
-}
-
-inline VecIdx Quat::operator []( int idx )
-{
-    return VecIdx( mVec128, idx );
-}
-
-inline const floatInVec Quat::operator []( int idx ) const
-{
-    return floatInVec( mVec128, idx );
-}
-
-inline const Quat Quat::operator +( const Quat &quat ) const
-{
-    return Quat( _mm_add_ps( mVec128, quat.mVec128 ) );
-}
-
-inline const Quat Quat::operator -( const Quat &quat ) const
-{
-    return Quat( _mm_sub_ps( mVec128, quat.mVec128 ) );
-}
-
-inline const Quat Quat::operator *( float scalar ) const
-{
-    return *this * floatInVec(scalar);
-}
-
-inline const Quat Quat::operator *( const floatInVec &scalar ) const
-{
-    return Quat( _mm_mul_ps( mVec128, scalar.get128() ) );
-}
-
-inline Quat & Quat::operator +=( const Quat &quat )
-{
-    *this = *this + quat;
-    return *this;
-}
-
-inline Quat & Quat::operator -=( const Quat &quat )
-{
-    *this = *this - quat;
-    return *this;
-}
-
-inline Quat & Quat::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline Quat & Quat::operator *=( const floatInVec &scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Quat Quat::operator /( float scalar ) const
-{
-    return *this / floatInVec(scalar);
-}
-
-inline const Quat Quat::operator /( const floatInVec &scalar ) const
-{
-    return Quat( _mm_div_ps( mVec128, scalar.get128() ) );
-}
-
-inline Quat & Quat::operator /=( float scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline Quat & Quat::operator /=( const floatInVec &scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Quat Quat::operator -( ) const
-{
-	return Quat(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) );
-}
-
-inline const Quat operator *( float scalar, const Quat &quat )
-{
-    return floatInVec(scalar) * quat;
-}
-
-inline const Quat operator *( const floatInVec &scalar, const Quat &quat )
-{
-    return quat * scalar;
-}
-
-inline const floatInVec dot( const Quat &quat0, const Quat &quat1 )
-{
-    return floatInVec( _vmathVfDot4( quat0.get128(), quat1.get128() ), 0 );
-}
-
-inline const floatInVec norm( const Quat &quat )
-{
-    return floatInVec(  _vmathVfDot4( quat.get128(), quat.get128() ), 0 );
-}
-
-inline const floatInVec length( const Quat &quat )
-{
-    return floatInVec(  _mm_sqrt_ps(_vmathVfDot4( quat.get128(), quat.get128() )), 0 );
-}
-
-inline const Quat normalize( const Quat &quat )
-{
-    return Quat( _mm_mul_ps( quat.get128(), _mm_rsqrt_ps( _vmathVfDot4( quat.get128(), quat.get128() ) ) ) );
-}
-
-inline const Quat Quat::rotation( const Vector3 &unitVec0, const Vector3 &unitVec1 )
-{
-    Vector3 crossVec;
-    __m128 cosAngle, cosAngleX2Plus2, recipCosHalfAngleX2, cosHalfAngleX2, res;
-    cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
-    cosAngleX2Plus2 = vec_madd( cosAngle, _mm_set1_ps(2.0f), _mm_set1_ps(2.0f) );
-    recipCosHalfAngleX2 = _mm_rsqrt_ps( cosAngleX2Plus2 );
-    cosHalfAngleX2 = vec_mul( recipCosHalfAngleX2, cosAngleX2Plus2 );
-    crossVec = cross( unitVec0, unitVec1 );
-    res = vec_mul( crossVec.get128(), recipCosHalfAngleX2 );
-	__declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff};
-    res = vec_sel( res, vec_mul( cosHalfAngleX2, _mm_set1_ps(0.5f) ), sw );
-    return Quat( res );
-}
-
-inline const Quat Quat::rotation( float radians, const Vector3 &unitVec )
-{
-    return rotation( floatInVec(radians), unitVec );
-}
-
-inline const Quat Quat::rotation( const floatInVec &radians, const Vector3 &unitVec )
-{
-    __m128 s, c, angle, res;
-    angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) );
-    sincosf4( angle, &s, &c );
-	__declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff};
-    res = vec_sel( vec_mul( unitVec.get128(), s ), c, sw );
-    return Quat( res );
-}
-
-inline const Quat Quat::rotationX( float radians )
-{
-    return rotationX( floatInVec(radians) );
-}
-
-inline const Quat Quat::rotationX( const floatInVec &radians )
-{
-    __m128 s, c, angle, res;
-    angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) );
-    sincosf4( angle, &s, &c );
-	__declspec(align(16)) unsigned int xsw[4] = {0xffffffff, 0, 0, 0};
-	__declspec(align(16)) unsigned int wsw[4] = {0, 0, 0, 0xffffffff};
-    res = vec_sel( _mm_setzero_ps(), s, xsw );
-    res = vec_sel( res, c, wsw );
-    return Quat( res );
-}
-
-inline const Quat Quat::rotationY( float radians )
-{
-    return rotationY( floatInVec(radians) );
-}
-
-inline const Quat Quat::rotationY( const floatInVec &radians )
-{
-    __m128 s, c, angle, res;
-    angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) );
-    sincosf4( angle, &s, &c );
-	__declspec(align(16)) unsigned int ysw[4] = {0, 0xffffffff, 0, 0};
-	__declspec(align(16)) unsigned int wsw[4] = {0, 0, 0, 0xffffffff};
-    res = vec_sel( _mm_setzero_ps(), s, ysw );
-    res = vec_sel( res, c, wsw );
-    return Quat( res );
-}
-
-inline const Quat Quat::rotationZ( float radians )
-{
-    return rotationZ( floatInVec(radians) );
-}
-
-inline const Quat Quat::rotationZ( const floatInVec &radians )
-{
-    __m128 s, c, angle, res;
-    angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) );
-    sincosf4( angle, &s, &c );
-	__declspec(align(16)) unsigned int zsw[4] = {0, 0, 0xffffffff, 0};
-	__declspec(align(16)) unsigned int wsw[4] = {0, 0, 0, 0xffffffff};
-    res = vec_sel( _mm_setzero_ps(), s, zsw );
-    res = vec_sel( res, c, wsw );
-    return Quat( res );
-}
-
-inline const Quat Quat::operator *( const Quat &quat ) const
-{
-    __m128 ldata, rdata, qv, tmp0, tmp1, tmp2, tmp3;
-    __m128 product, l_wxyz, r_wxyz, xy, qw;
-    ldata = mVec128;
-    rdata = quat.mVec128;
-    tmp0 = _mm_shuffle_ps( ldata, ldata, _MM_SHUFFLE(3,0,2,1) );
-    tmp1 = _mm_shuffle_ps( rdata, rdata, _MM_SHUFFLE(3,1,0,2) );
-    tmp2 = _mm_shuffle_ps( ldata, ldata, _MM_SHUFFLE(3,1,0,2) );
-    tmp3 = _mm_shuffle_ps( rdata, rdata, _MM_SHUFFLE(3,0,2,1) );
-    qv = vec_mul( vec_splat( ldata, 3 ), rdata );
-    qv = vec_madd( vec_splat( rdata, 3 ), ldata, qv );
-    qv = vec_madd( tmp0, tmp1, qv );
-    qv = vec_nmsub( tmp2, tmp3, qv );
-    product = vec_mul( ldata, rdata );
-    l_wxyz = vec_sld( ldata, ldata, 12 );
-    r_wxyz = vec_sld( rdata, rdata, 12 );
-    qw = vec_nmsub( l_wxyz, r_wxyz, product );
-    xy = vec_madd( l_wxyz, r_wxyz, product );
-    qw = vec_sub( qw, vec_sld( xy, xy, 8 ) );
-	__declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff};
-    return Quat( vec_sel( qv, qw, sw ) );
-}
-
-inline Quat & Quat::operator *=( const Quat &quat )
-{
-    *this = *this * quat;
-    return *this;
-}
-
-inline const Vector3 rotate( const Quat &quat, const Vector3 &vec )
-{    __m128 qdata, vdata, product, tmp0, tmp1, tmp2, tmp3, wwww, qv, qw, res;
-    qdata = quat.get128();
-    vdata = vec.get128();
-    tmp0 = _mm_shuffle_ps( qdata, qdata, _MM_SHUFFLE(3,0,2,1) );
-    tmp1 = _mm_shuffle_ps( vdata, vdata, _MM_SHUFFLE(3,1,0,2) );
-    tmp2 = _mm_shuffle_ps( qdata, qdata, _MM_SHUFFLE(3,1,0,2) );
-    tmp3 = _mm_shuffle_ps( vdata, vdata, _MM_SHUFFLE(3,0,2,1) );
-    wwww = vec_splat( qdata, 3 );
-    qv = vec_mul( wwww, vdata );
-    qv = vec_madd( tmp0, tmp1, qv );
-    qv = vec_nmsub( tmp2, tmp3, qv );
-    product = vec_mul( qdata, vdata );
-    qw = vec_madd( vec_sld( qdata, qdata, 4 ), vec_sld( vdata, vdata, 4 ), product );
-    qw = vec_add( vec_sld( product, product, 8 ), qw );
-    tmp1 = _mm_shuffle_ps( qv, qv, _MM_SHUFFLE(3,1,0,2) );
-    tmp3 = _mm_shuffle_ps( qv, qv, _MM_SHUFFLE(3,0,2,1) );
-    res = vec_mul( vec_splat( qw, 0 ), qdata );
-    res = vec_madd( wwww, qv, res );
-    res = vec_madd( tmp0, tmp1, res );
-    res = vec_nmsub( tmp2, tmp3, res );
-    return Vector3( res );
-}
-
-inline const Quat conj( const Quat &quat )
-{
-	__declspec(align(16)) unsigned int sw[4] = {0x80000000,0x80000000,0x80000000,0};
-    return Quat( vec_xor( quat.get128(), _mm_load_ps((float *)sw) ) );
-}
-
-inline const Quat select( const Quat &quat0, const Quat &quat1, bool select1 )
-{
-    return select( quat0, quat1, boolInVec(select1) );
-}
-
-inline const Quat select( const Quat &quat0, const Quat &quat1, const boolInVec &select1 )
-{
-    return Quat( vec_sel( quat0.get128(), quat1.get128(), select1.get128() ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Quat &quat )
-{
-    union { __m128 v; float s[4]; } tmp;
-    tmp.v = quat.get128();
-    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-inline void print( const Quat &quat, const char * name )
-{
-    union { __m128 v; float s[4]; } tmp;
-    tmp.v = quat.get128();
-    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-#endif
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#ifndef _VECTORMATH_QUAT_AOS_CPP_H
+#define _VECTORMATH_QUAT_AOS_CPP_H
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+namespace Vectormath {
+namespace Aos {
+
+inline Quat::Quat( float _x, float _y, float _z, float _w )
+{
+    mVec128 = _mm_setr_ps(_x, _y, _z, _w);
+}
+
+inline Quat::Quat( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z, const floatInVec &_w )
+{
+	mVec128 = _mm_unpacklo_ps(
+		_mm_unpacklo_ps( _x.get128(), _z.get128() ),
+		_mm_unpacklo_ps( _y.get128(), _w.get128() ) );
+}
+
+inline Quat::Quat( const Vector3 &xyz, float _w )
+{
+    mVec128 = xyz.get128();
+    _vmathVfSetElement(mVec128, _w, 3);
+}
+
+inline Quat::Quat( const Vector3 &xyz, const floatInVec &_w )
+{
+    mVec128 = xyz.get128();
+    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
+}
+
+inline Quat::Quat( const Vector4 &vec )
+{
+    mVec128 = vec.get128();
+}
+
+inline Quat::Quat( float scalar )
+{
+    mVec128 = floatInVec(scalar).get128();
+}
+
+inline Quat::Quat( const floatInVec &scalar )
+{
+    mVec128 = scalar.get128();
+}
+
+inline Quat::Quat( __m128 vf4 )
+{
+    mVec128 = vf4;
+}
+
+inline const Quat Quat::identity( )
+{
+    return Quat( _VECTORMATH_UNIT_0001 );
+}
+
+inline const Quat lerp( float t, const Quat &quat0, const Quat &quat1 )
+{
+    return lerp( floatInVec(t), quat0, quat1 );
+}
+
+inline const Quat lerp( const floatInVec &t, const Quat &quat0, const Quat &quat1 )
+{
+    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
+}
+
+inline const Quat slerp( float t, const Quat &unitQuat0, const Quat &unitQuat1 )
+{
+    return slerp( floatInVec(t), unitQuat0, unitQuat1 );
+}
+
+inline const Quat slerp( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1 )
+{
+    Quat start;
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    vec_uint4 selectMask;
+    cosAngle = _vmathVfDot4( unitQuat0.get128(), unitQuat1.get128() );
+    selectMask = (vec_uint4)vec_cmpgt( _mm_setzero_ps(), cosAngle );
+    cosAngle = vec_sel( cosAngle, negatef4( cosAngle ), selectMask );
+    start = Quat( vec_sel( unitQuat0.get128(), negatef4( unitQuat0.get128() ), selectMask ) );
+    selectMask = (vec_uint4)vec_cmpgt( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = t.get128();
+    oneMinusT = vec_sub( _mm_set1_ps(1.0f), tttt );
+    angles = vec_mergeh( _mm_set1_ps(1.0f), tttt );
+    angles = vec_mergeh( angles, oneMinusT );
+    angles = vec_madd( angles, angle, _mm_setzero_ps() );
+    sines = sinf4( angles );
+    scales = _mm_div_ps( sines, vec_splat( sines, 0 ) );
+    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
+    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
+    return Quat( vec_madd( start.get128(), scale0, vec_mul( unitQuat1.get128(), scale1 ) ) );
+}
+
+inline const Quat squad( float t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 )
+{
+    return squad( floatInVec(t), unitQuat0, unitQuat1, unitQuat2, unitQuat3 );
+}
+
+inline const Quat squad( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 )
+{
+    return slerp( ( ( floatInVec(2.0f) * t ) * ( floatInVec(1.0f) - t ) ), slerp( t, unitQuat0, unitQuat3 ), slerp( t, unitQuat1, unitQuat2 ) );
+}
+
+inline __m128 Quat::get128( ) const
+{
+    return mVec128;
+}
+
+inline Quat & Quat::operator =( const Quat &quat )
+{
+    mVec128 = quat.mVec128;
+    return *this;
+}
+
+inline Quat & Quat::setXYZ( const Vector3 &vec )
+{
+	__declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff};
+	mVec128 = vec_sel( vec.get128(), mVec128, sw );
+    return *this;
+}
+
+inline const Vector3 Quat::getXYZ( ) const
+{
+    return Vector3( mVec128 );
+}
+
+inline Quat & Quat::setX( float _x )
+{
+    _vmathVfSetElement(mVec128, _x, 0);
+    return *this;
+}
+
+inline Quat & Quat::setX( const floatInVec &_x )
+{
+    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
+    return *this;
+}
+
+inline const floatInVec Quat::getX( ) const
+{
+    return floatInVec( mVec128, 0 );
+}
+
+inline Quat & Quat::setY( float _y )
+{
+    _vmathVfSetElement(mVec128, _y, 1);
+    return *this;
+}
+
+inline Quat & Quat::setY( const floatInVec &_y )
+{
+    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
+    return *this;
+}
+
+inline const floatInVec Quat::getY( ) const
+{
+    return floatInVec( mVec128, 1 );
+}
+
+inline Quat & Quat::setZ( float _z )
+{
+    _vmathVfSetElement(mVec128, _z, 2);
+    return *this;
+}
+
+inline Quat & Quat::setZ( const floatInVec &_z )
+{
+    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
+    return *this;
+}
+
+inline const floatInVec Quat::getZ( ) const
+{
+    return floatInVec( mVec128, 2 );
+}
+
+inline Quat & Quat::setW( float _w )
+{
+    _vmathVfSetElement(mVec128, _w, 3);
+    return *this;
+}
+
+inline Quat & Quat::setW( const floatInVec &_w )
+{
+    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
+    return *this;
+}
+
+inline const floatInVec Quat::getW( ) const
+{
+    return floatInVec( mVec128, 3 );
+}
+
+inline Quat & Quat::setElem( int idx, float value )
+{
+    _vmathVfSetElement(mVec128, value, idx);
+    return *this;
+}
+
+inline Quat & Quat::setElem( int idx, const floatInVec &value )
+{
+    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
+    return *this;
+}
+
+inline const floatInVec Quat::getElem( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+inline VecIdx Quat::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+inline const floatInVec Quat::operator []( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+inline const Quat Quat::operator +( const Quat &quat ) const
+{
+    return Quat( _mm_add_ps( mVec128, quat.mVec128 ) );
+}
+
+inline const Quat Quat::operator -( const Quat &quat ) const
+{
+    return Quat( _mm_sub_ps( mVec128, quat.mVec128 ) );
+}
+
+inline const Quat Quat::operator *( float scalar ) const
+{
+    return *this * floatInVec(scalar);
+}
+
+inline const Quat Quat::operator *( const floatInVec &scalar ) const
+{
+    return Quat( _mm_mul_ps( mVec128, scalar.get128() ) );
+}
+
+inline Quat & Quat::operator +=( const Quat &quat )
+{
+    *this = *this + quat;
+    return *this;
+}
+
+inline Quat & Quat::operator -=( const Quat &quat )
+{
+    *this = *this - quat;
+    return *this;
+}
+
+inline Quat & Quat::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline Quat & Quat::operator *=( const floatInVec &scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Quat Quat::operator /( float scalar ) const
+{
+    return *this / floatInVec(scalar);
+}
+
+inline const Quat Quat::operator /( const floatInVec &scalar ) const
+{
+    return Quat( _mm_div_ps( mVec128, scalar.get128() ) );
+}
+
+inline Quat & Quat::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline Quat & Quat::operator /=( const floatInVec &scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Quat Quat::operator -( ) const
+{
+	return Quat(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) );
+}
+
+inline const Quat operator *( float scalar, const Quat &quat )
+{
+    return floatInVec(scalar) * quat;
+}
+
+inline const Quat operator *( const floatInVec &scalar, const Quat &quat )
+{
+    return quat * scalar;
+}
+
+inline const floatInVec dot( const Quat &quat0, const Quat &quat1 )
+{
+    return floatInVec( _vmathVfDot4( quat0.get128(), quat1.get128() ), 0 );
+}
+
+inline const floatInVec norm( const Quat &quat )
+{
+    return floatInVec(  _vmathVfDot4( quat.get128(), quat.get128() ), 0 );
+}
+
+inline const floatInVec length( const Quat &quat )
+{
+    return floatInVec(  _mm_sqrt_ps(_vmathVfDot4( quat.get128(), quat.get128() )), 0 );
+}
+
+inline const Quat normalize( const Quat &quat )
+{
+    return Quat( _mm_mul_ps( quat.get128(), _mm_rsqrt_ps( _vmathVfDot4( quat.get128(), quat.get128() ) ) ) );
+}
+
+inline const Quat Quat::rotation( const Vector3 &unitVec0, const Vector3 &unitVec1 )
+{
+    Vector3 crossVec;
+    __m128 cosAngle, cosAngleX2Plus2, recipCosHalfAngleX2, cosHalfAngleX2, res;
+    cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
+    cosAngleX2Plus2 = vec_madd( cosAngle, _mm_set1_ps(2.0f), _mm_set1_ps(2.0f) );
+    recipCosHalfAngleX2 = _mm_rsqrt_ps( cosAngleX2Plus2 );
+    cosHalfAngleX2 = vec_mul( recipCosHalfAngleX2, cosAngleX2Plus2 );
+    crossVec = cross( unitVec0, unitVec1 );
+    res = vec_mul( crossVec.get128(), recipCosHalfAngleX2 );
+	__declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff};
+    res = vec_sel( res, vec_mul( cosHalfAngleX2, _mm_set1_ps(0.5f) ), sw );
+    return Quat( res );
+}
+
+inline const Quat Quat::rotation( float radians, const Vector3 &unitVec )
+{
+    return rotation( floatInVec(radians), unitVec );
+}
+
+inline const Quat Quat::rotation( const floatInVec &radians, const Vector3 &unitVec )
+{
+    __m128 s, c, angle, res;
+    angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) );
+    sincosf4( angle, &s, &c );
+	__declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff};
+    res = vec_sel( vec_mul( unitVec.get128(), s ), c, sw );
+    return Quat( res );
+}
+
+inline const Quat Quat::rotationX( float radians )
+{
+    return rotationX( floatInVec(radians) );
+}
+
+inline const Quat Quat::rotationX( const floatInVec &radians )
+{
+    __m128 s, c, angle, res;
+    angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) );
+    sincosf4( angle, &s, &c );
+	__declspec(align(16)) unsigned int xsw[4] = {0xffffffff, 0, 0, 0};
+	__declspec(align(16)) unsigned int wsw[4] = {0, 0, 0, 0xffffffff};
+    res = vec_sel( _mm_setzero_ps(), s, xsw );
+    res = vec_sel( res, c, wsw );
+    return Quat( res );
+}
+
+inline const Quat Quat::rotationY( float radians )
+{
+    return rotationY( floatInVec(radians) );
+}
+
+inline const Quat Quat::rotationY( const floatInVec &radians )
+{
+    __m128 s, c, angle, res;
+    angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) );
+    sincosf4( angle, &s, &c );
+	__declspec(align(16)) unsigned int ysw[4] = {0, 0xffffffff, 0, 0};
+	__declspec(align(16)) unsigned int wsw[4] = {0, 0, 0, 0xffffffff};
+    res = vec_sel( _mm_setzero_ps(), s, ysw );
+    res = vec_sel( res, c, wsw );
+    return Quat( res );
+}
+
+inline const Quat Quat::rotationZ( float radians )
+{
+    return rotationZ( floatInVec(radians) );
+}
+
+inline const Quat Quat::rotationZ( const floatInVec &radians )
+{
+    __m128 s, c, angle, res;
+    angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) );
+    sincosf4( angle, &s, &c );
+	__declspec(align(16)) unsigned int zsw[4] = {0, 0, 0xffffffff, 0};
+	__declspec(align(16)) unsigned int wsw[4] = {0, 0, 0, 0xffffffff};
+    res = vec_sel( _mm_setzero_ps(), s, zsw );
+    res = vec_sel( res, c, wsw );
+    return Quat( res );
+}
+
+inline const Quat Quat::operator *( const Quat &quat ) const
+{
+    __m128 ldata, rdata, qv, tmp0, tmp1, tmp2, tmp3;
+    __m128 product, l_wxyz, r_wxyz, xy, qw;
+    ldata = mVec128;
+    rdata = quat.mVec128;
+    tmp0 = _mm_shuffle_ps( ldata, ldata, _MM_SHUFFLE(3,0,2,1) );
+    tmp1 = _mm_shuffle_ps( rdata, rdata, _MM_SHUFFLE(3,1,0,2) );
+    tmp2 = _mm_shuffle_ps( ldata, ldata, _MM_SHUFFLE(3,1,0,2) );
+    tmp3 = _mm_shuffle_ps( rdata, rdata, _MM_SHUFFLE(3,0,2,1) );
+    qv = vec_mul( vec_splat( ldata, 3 ), rdata );
+    qv = vec_madd( vec_splat( rdata, 3 ), ldata, qv );
+    qv = vec_madd( tmp0, tmp1, qv );
+    qv = vec_nmsub( tmp2, tmp3, qv );
+    product = vec_mul( ldata, rdata );
+    l_wxyz = vec_sld( ldata, ldata, 12 );
+    r_wxyz = vec_sld( rdata, rdata, 12 );
+    qw = vec_nmsub( l_wxyz, r_wxyz, product );
+    xy = vec_madd( l_wxyz, r_wxyz, product );
+    qw = vec_sub( qw, vec_sld( xy, xy, 8 ) );
+	__declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff};
+    return Quat( vec_sel( qv, qw, sw ) );
+}
+
+inline Quat & Quat::operator *=( const Quat &quat )
+{
+    *this = *this * quat;
+    return *this;
+}
+
+inline const Vector3 rotate( const Quat &quat, const Vector3 &vec )
+{    __m128 qdata, vdata, product, tmp0, tmp1, tmp2, tmp3, wwww, qv, qw, res;
+    qdata = quat.get128();
+    vdata = vec.get128();
+    tmp0 = _mm_shuffle_ps( qdata, qdata, _MM_SHUFFLE(3,0,2,1) );
+    tmp1 = _mm_shuffle_ps( vdata, vdata, _MM_SHUFFLE(3,1,0,2) );
+    tmp2 = _mm_shuffle_ps( qdata, qdata, _MM_SHUFFLE(3,1,0,2) );
+    tmp3 = _mm_shuffle_ps( vdata, vdata, _MM_SHUFFLE(3,0,2,1) );
+    wwww = vec_splat( qdata, 3 );
+    qv = vec_mul( wwww, vdata );
+    qv = vec_madd( tmp0, tmp1, qv );
+    qv = vec_nmsub( tmp2, tmp3, qv );
+    product = vec_mul( qdata, vdata );
+    qw = vec_madd( vec_sld( qdata, qdata, 4 ), vec_sld( vdata, vdata, 4 ), product );
+    qw = vec_add( vec_sld( product, product, 8 ), qw );
+    tmp1 = _mm_shuffle_ps( qv, qv, _MM_SHUFFLE(3,1,0,2) );
+    tmp3 = _mm_shuffle_ps( qv, qv, _MM_SHUFFLE(3,0,2,1) );
+    res = vec_mul( vec_splat( qw, 0 ), qdata );
+    res = vec_madd( wwww, qv, res );
+    res = vec_madd( tmp0, tmp1, res );
+    res = vec_nmsub( tmp2, tmp3, res );
+    return Vector3( res );
+}
+
+inline const Quat conj( const Quat &quat )
+{
+	__declspec(align(16)) unsigned int sw[4] = {0x80000000,0x80000000,0x80000000,0};
+    return Quat( vec_xor( quat.get128(), _mm_load_ps((float *)sw) ) );
+}
+
+inline const Quat select( const Quat &quat0, const Quat &quat1, bool select1 )
+{
+    return select( quat0, quat1, boolInVec(select1) );
+}
+
+inline const Quat select( const Quat &quat0, const Quat &quat1, const boolInVec &select1 )
+{
+    return Quat( vec_sel( quat0.get128(), quat1.get128(), select1.get128() ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Quat &quat )
+{
+    union { __m128 v; float s[4]; } tmp;
+    tmp.v = quat.get128();
+    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+inline void print( const Quat &quat, const char * name )
+{
+    union { __m128 v; float s[4]; } tmp;
+    tmp.v = quat.get128();
+    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vec_aos.h b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vec_aos.h
index 3557af08d..9ebecacff 100644
--- a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vec_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vec_aos.h
@@ -1,1380 +1,1380 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_AOS_CPP_H
-#define _VECTORMATH_VEC_AOS_CPP_H
-
-//-----------------------------------------------------------------------------
-// Constants
-// for permutes words are labeled [x,y,z,w] [a,b,c,d]
-
-#define _VECTORMATH_PERM_X 0x00010203
-#define _VECTORMATH_PERM_Y 0x04050607
-#define _VECTORMATH_PERM_Z 0x08090a0b
-#define _VECTORMATH_PERM_W 0x0c0d0e0f
-#define _VECTORMATH_PERM_A 0x10111213
-#define _VECTORMATH_PERM_B 0x14151617
-#define _VECTORMATH_PERM_C 0x18191a1b
-#define _VECTORMATH_PERM_D 0x1c1d1e1f
-#define _VECTORMATH_PERM_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A }
-#define _VECTORMATH_PERM_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_W }
-#define _VECTORMATH_PERM_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W }
-#define _VECTORMATH_PERM_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B }
-#define _VECTORMATH_PERM_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B, _VECTORMATH_PERM_C }
-#define _VECTORMATH_PERM_XYAW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_W }
-#define _VECTORMATH_PERM_XAZW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W }
-#define _VECTORMATH_MASK_0xF000 (vec_uint4){ 0xffffffff, 0, 0, 0 }
-#define _VECTORMATH_MASK_0x0F00 (vec_uint4){ 0, 0xffffffff, 0, 0 }
-#define _VECTORMATH_MASK_0x00F0 (vec_uint4){ 0, 0, 0xffffffff, 0 }
-#define _VECTORMATH_MASK_0x000F (vec_uint4){ 0, 0, 0, 0xffffffff }
-#define _VECTORMATH_UNIT_1000 _mm_setr_ps(1.0f,0.0f,0.0f,0.0f) // (__m128){ 1.0f, 0.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0100 _mm_setr_ps(0.0f,1.0f,0.0f,0.0f) // (__m128){ 0.0f, 1.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0010 _mm_setr_ps(0.0f,0.0f,1.0f,0.0f) // (__m128){ 0.0f, 0.0f, 1.0f, 0.0f }
-#define _VECTORMATH_UNIT_0001 _mm_setr_ps(0.0f,0.0f,0.0f,1.0f) // (__m128){ 0.0f, 0.0f, 0.0f, 1.0f }
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-static inline __m128 _vmathVfDot3( __m128 vec0, __m128 vec1 )
-{
-    __m128 result = _mm_mul_ps( vec0, vec1);
-    return _mm_add_ps( vec_splat( result, 0 ), _mm_add_ps( vec_splat( result, 1 ), vec_splat( result, 2 ) ) );
-}
-
-static inline __m128 _vmathVfDot4( __m128 vec0, __m128 vec1 )
-{
-    __m128 result = _mm_mul_ps(vec0, vec1);
-	return _mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(0,0,0,0)),
-			_mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(1,1,1,1)),
-			_mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(2,2,2,2)), _mm_shuffle_ps(result, result, _MM_SHUFFLE(3,3,3,3)))));
-}
-
-static inline __m128 _vmathVfCross( __m128 vec0, __m128 vec1 )
-{
-    __m128 tmp0, tmp1, tmp2, tmp3, result;
-    tmp0 = _mm_shuffle_ps( vec0, vec0, _MM_SHUFFLE(3,0,2,1) );
-    tmp1 = _mm_shuffle_ps( vec1, vec1, _MM_SHUFFLE(3,1,0,2) );
-    tmp2 = _mm_shuffle_ps( vec0, vec0, _MM_SHUFFLE(3,1,0,2) );
-    tmp3 = _mm_shuffle_ps( vec1, vec1, _MM_SHUFFLE(3,0,2,1) );
-    result = vec_mul( tmp0, tmp1 );
-    result = vec_nmsub( tmp2, tmp3, result );
-    return result;
-}
-/*
-static inline vec_uint4 _vmathVfToHalfFloatsUnpacked(__m128 v)
-{
-#if 0
-    vec_int4 bexp;
-    vec_uint4 mant, sign, hfloat;
-    vec_uint4 notZero, isInf;
-    const vec_uint4 hfloatInf = (vec_uint4)(0x00007c00u);
-    const vec_uint4 mergeMant = (vec_uint4)(0x000003ffu);
-    const vec_uint4 mergeSign = (vec_uint4)(0x00008000u);
-
-    sign = vec_sr((vec_uint4)v, (vec_uint4)16);
-    mant = vec_sr((vec_uint4)v, (vec_uint4)13);
-    bexp = vec_and(vec_sr((vec_int4)v, (vec_uint4)23), (vec_int4)0xff);
-
-    notZero = (vec_uint4)vec_cmpgt(bexp, (vec_int4)112);
-    isInf = (vec_uint4)vec_cmpgt(bexp, (vec_int4)142);
-
-    bexp = _mm_add_ps(bexp, (vec_int4)-112);
-    bexp = vec_sl(bexp, (vec_uint4)10);
-
-    hfloat = vec_sel((vec_uint4)bexp, mant, mergeMant);
-    hfloat = vec_sel((vec_uint4)(0), hfloat, notZero);
-    hfloat = vec_sel(hfloat, hfloatInf, isInf);
-    hfloat = vec_sel(hfloat, sign, mergeSign);
-
-    return hfloat;
-#else
-	assert(0);
-	return _mm_setzero_ps();
-#endif
-}
-
-static inline vec_ushort8 _vmath2VfToHalfFloats(__m128 u, __m128 v)
-{
-#if 0
-    vec_uint4 hfloat_u, hfloat_v;
-    const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
-    hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
-    hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
-    return (vec_ushort8)vec_perm(hfloat_u, hfloat_v, pack);
-#else
-	assert(0);
-	return _mm_setzero_si128();
-#endif
-}
-*/
-
-static inline __m128 _vmathVfInsert(__m128 dst, __m128 src, int slot)
-{
-	SSEFloat s;
-	s.m128 = src;
-	SSEFloat d;
-	d.m128 = dst;
-	d.f[slot] = s.f[slot];
-	return d.m128;
-}
-
-#define _vmathVfSetElement(vec, scalar, slot) ((float *)&(vec))[slot] = scalar
-
-static inline __m128 _vmathVfSplatScalar(float scalar)
-{
-	return _mm_set1_ps(scalar);
-}
-
-#endif
-
-namespace Vectormath {
-namespace Aos {
-
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-inline VecIdx::operator floatInVec() const
-{
-    return floatInVec(ref, i);
-}
-
-inline float VecIdx::getAsFloat() const
-#else
-inline VecIdx::operator float() const
-#endif
-{
-    return ((float *)&ref)[i];
-}
-
-inline float VecIdx::operator =( float scalar )
-{
-    _vmathVfSetElement(ref, scalar, i);
-    return scalar;
-}
-
-inline floatInVec VecIdx::operator =( const floatInVec &scalar )
-{
-    ref = _vmathVfInsert(ref, scalar.get128(), i);
-    return scalar;
-}
-
-inline floatInVec VecIdx::operator =( const VecIdx& scalar )
-{
-    return *this = floatInVec(scalar.ref, scalar.i);
-}
-
-inline floatInVec VecIdx::operator *=( float scalar )
-{
-    return *this *= floatInVec(scalar);
-}
-
-inline floatInVec VecIdx::operator *=( const floatInVec &scalar )
-{
-    return *this = floatInVec(ref, i) * scalar;
-}
-
-inline floatInVec VecIdx::operator /=( float scalar )
-{
-    return *this /= floatInVec(scalar);
-}
-
-inline floatInVec VecIdx::operator /=( const floatInVec &scalar )
-{
-    return *this = floatInVec(ref, i) / scalar;
-}
-
-inline floatInVec VecIdx::operator +=( float scalar )
-{
-    return *this += floatInVec(scalar);
-}
-
-inline floatInVec VecIdx::operator +=( const floatInVec &scalar )
-{
-    return *this = floatInVec(ref, i) + scalar;
-}
-
-inline floatInVec VecIdx::operator -=( float scalar )
-{
-    return *this -= floatInVec(scalar);
-}
-
-inline floatInVec VecIdx::operator -=( const floatInVec &scalar )
-{
-    return *this = floatInVec(ref, i) - scalar;
-}
-
-inline Vector3::Vector3( float _x, float _y, float _z )
-{
-    mVec128 = _mm_setr_ps(_x, _y, _z, 0.0f);
-}
-
-inline Vector3::Vector3( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z )
-{
-	__m128 xz = _mm_unpacklo_ps( _x.get128(), _z.get128() );
-	mVec128 = _mm_unpacklo_ps( xz, _y.get128() );
-}
-
-inline Vector3::Vector3( const Point3 &pnt )
-{
-    mVec128 = pnt.get128();
-}
-
-inline Vector3::Vector3( float scalar )
-{
-    mVec128 = floatInVec(scalar).get128();
-}
-
-inline Vector3::Vector3( const floatInVec &scalar )
-{
-    mVec128 = scalar.get128();
-}
-
-inline Vector3::Vector3( __m128 vf4 )
-{
-    mVec128 = vf4;
-}
-
-inline const Vector3 Vector3::xAxis( )
-{
-    return Vector3( _VECTORMATH_UNIT_1000 );
-}
-
-inline const Vector3 Vector3::yAxis( )
-{
-    return Vector3( _VECTORMATH_UNIT_0100 );
-}
-
-inline const Vector3 Vector3::zAxis( )
-{
-    return Vector3( _VECTORMATH_UNIT_0010 );
-}
-
-inline const Vector3 lerp( float t, const Vector3 &vec0, const Vector3 &vec1 )
-{
-    return lerp( floatInVec(t), vec0, vec1 );
-}
-
-inline const Vector3 lerp( const floatInVec &t, const Vector3 &vec0, const Vector3 &vec1 )
-{
-    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
-}
-
-inline const Vector3 slerp( float t, const Vector3 &unitVec0, const Vector3 &unitVec1 )
-{
-    return slerp( floatInVec(t), unitVec0, unitVec1 );
-}
-
-inline const Vector3 slerp( const floatInVec &t, const Vector3 &unitVec0, const Vector3 &unitVec1 )
-{
-    __m128 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
-    __m128 selectMask = _mm_cmpgt_ps( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = t.get128();
-    oneMinusT = _mm_sub_ps( _mm_set1_ps(1.0f), tttt );
-    angles = _mm_unpacklo_ps( _mm_set1_ps(1.0f), tttt ); // angles = 1, t, 1, t
-    angles = _mm_unpacklo_ps( angles, oneMinusT );		// angles = 1, 1-t, t, 1-t
-    angles = _mm_mul_ps( angles, angle );
-    sines = sinf4( angles );
-    scales = _mm_div_ps( sines, vec_splat( sines, 0 ) );
-    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
-    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
-    return Vector3( vec_madd( unitVec0.get128(), scale0, _mm_mul_ps( unitVec1.get128(), scale1 ) ) );
-}
-
-inline __m128 Vector3::get128( ) const
-{
-    return mVec128;
-}
-
-inline void storeXYZ( const Vector3 &vec, __m128 * quad )
-{
-    __m128 dstVec = *quad;
-	__declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff}; // TODO: Centralize
-    dstVec = vec_sel(vec.get128(), dstVec, sw);
-    *quad = dstVec;
-}
-
-inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const __m128 * threeQuads )
-{
-	const float *quads = (float *)threeQuads;
-    vec0 = Vector3(  _mm_load_ps(quads) );
-    vec1 = Vector3( _mm_loadu_ps(quads + 3) );
-    vec2 = Vector3( _mm_loadu_ps(quads + 6) );
-    vec3 = Vector3( _mm_loadu_ps(quads + 9) );
-}
-
-inline void storeXYZArray( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, __m128 * threeQuads )
-{
-	__m128 xxxx = _mm_shuffle_ps( vec1.get128(), vec1.get128(), _MM_SHUFFLE(0, 0, 0, 0) );
-	__m128 zzzz = _mm_shuffle_ps( vec2.get128(), vec2.get128(), _MM_SHUFFLE(2, 2, 2, 2) );
-	__declspec(align(16)) unsigned int xsw[4] = {0, 0, 0, 0xffffffff};
-	__declspec(align(16)) unsigned int zsw[4] = {0xffffffff, 0, 0, 0};
-	threeQuads[0] = vec_sel( vec0.get128(), xxxx, xsw );
-    threeQuads[1] = _mm_shuffle_ps( vec1.get128(), vec2.get128(), _MM_SHUFFLE(1, 0, 2, 1) );
-    threeQuads[2] = vec_sel( _mm_shuffle_ps( vec3.get128(), vec3.get128(), _MM_SHUFFLE(2, 1, 0, 3) ), zzzz, zsw );
-}
-/*
-inline void storeHalfFloats( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, const Vector3 &vec4, const Vector3 &vec5, const Vector3 &vec6, const Vector3 &vec7, vec_ushort8 * threeQuads )
-{
-	assert(0);
-#if 0
-    __m128 xyz0[3];
-    __m128 xyz1[3];
-    storeXYZArray( vec0, vec1, vec2, vec3, xyz0 );
-    storeXYZArray( vec4, vec5, vec6, vec7, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-#endif
-}
-*/
-inline Vector3 & Vector3::operator =( const Vector3 &vec )
-{
-    mVec128 = vec.mVec128;
-    return *this;
-}
-
-inline Vector3 & Vector3::setX( float _x )
-{
-    _vmathVfSetElement(mVec128, _x, 0);
-    return *this;
-}
-
-inline Vector3 & Vector3::setX( const floatInVec &_x )
-{
-    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
-    return *this;
-}
-
-inline const floatInVec Vector3::getX( ) const
-{
-    return floatInVec( mVec128, 0 );
-}
-
-inline Vector3 & Vector3::setY( float _y )
-{
-    _vmathVfSetElement(mVec128, _y, 1);
-    return *this;
-}
-
-inline Vector3 & Vector3::setY( const floatInVec &_y )
-{
-    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
-    return *this;
-}
-
-inline const floatInVec Vector3::getY( ) const
-{
-    return floatInVec( mVec128, 1 );
-}
-
-inline Vector3 & Vector3::setZ( float _z )
-{
-    _vmathVfSetElement(mVec128, _z, 2);
-    return *this;
-}
-
-inline Vector3 & Vector3::setZ( const floatInVec &_z )
-{
-    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
-    return *this;
-}
-
-inline const floatInVec Vector3::getZ( ) const
-{
-    return floatInVec( mVec128, 2 );
-}
-
-inline Vector3 & Vector3::setElem( int idx, float value )
-{
-    _vmathVfSetElement(mVec128, value, idx);
-    return *this;
-}
-
-inline Vector3 & Vector3::setElem( int idx, const floatInVec &value )
-{
-    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
-    return *this;
-}
-
-inline const floatInVec Vector3::getElem( int idx ) const
-{
-    return floatInVec( mVec128, idx );
-}
-
-inline VecIdx Vector3::operator []( int idx )
-{
-    return VecIdx( mVec128, idx );
-}
-
-inline const floatInVec Vector3::operator []( int idx ) const
-{
-    return floatInVec( mVec128, idx );
-}
-
-inline const Vector3 Vector3::operator +( const Vector3 &vec ) const
-{
-    return Vector3( _mm_add_ps( mVec128, vec.mVec128 ) );
-}
-
-inline const Vector3 Vector3::operator -( const Vector3 &vec ) const
-{
-    return Vector3( _mm_sub_ps( mVec128, vec.mVec128 ) );
-}
-
-inline const Point3 Vector3::operator +( const Point3 &pnt ) const
-{
-    return Point3( _mm_add_ps( mVec128, pnt.get128() ) );
-}
-
-inline const Vector3 Vector3::operator *( float scalar ) const
-{
-    return *this * floatInVec(scalar);
-}
-
-inline const Vector3 Vector3::operator *( const floatInVec &scalar ) const
-{
-    return Vector3( _mm_mul_ps( mVec128, scalar.get128() ) );
-}
-
-inline Vector3 & Vector3::operator +=( const Vector3 &vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator -=( const Vector3 &vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator *=( const floatInVec &scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Vector3 Vector3::operator /( float scalar ) const
-{
-    return *this / floatInVec(scalar);
-}
-
-inline const Vector3 Vector3::operator /( const floatInVec &scalar ) const
-{
-    return Vector3( _mm_div_ps( mVec128, scalar.get128() ) );
-}
-
-inline Vector3 & Vector3::operator /=( float scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator /=( const floatInVec &scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Vector3 Vector3::operator -( ) const
-{
-	return Vector3(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) );
-}
-
-inline const Vector3 operator *( float scalar, const Vector3 &vec )
-{
-    return floatInVec(scalar) * vec;
-}
-
-inline const Vector3 operator *( const floatInVec &scalar, const Vector3 &vec )
-{
-    return vec * scalar;
-}
-
-inline const Vector3 mulPerElem( const Vector3 &vec0, const Vector3 &vec1 )
-{
-    return Vector3( _mm_mul_ps( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector3 divPerElem( const Vector3 &vec0, const Vector3 &vec1 )
-{
-    return Vector3( _mm_div_ps( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector3 recipPerElem( const Vector3 &vec )
-{
-    return Vector3( _mm_rcp_ps( vec.get128() ) );
-}
-
-inline const Vector3 absPerElem( const Vector3 &vec )
-{
-    return Vector3( fabsf4( vec.get128() ) );
-}
-
-inline const Vector3 copySignPerElem( const Vector3 &vec0, const Vector3 &vec1 )
-{
-	__m128 vmask = toM128(0x7fffffff);
-	return Vector3( _mm_or_ps(
-		_mm_and_ps   ( vmask, vec0.get128() ),			// Value
-		_mm_andnot_ps( vmask, vec1.get128() ) ) );		// Signs
-}
-
-inline const Vector3 maxPerElem( const Vector3 &vec0, const Vector3 &vec1 )
-{
-    return Vector3( _mm_max_ps( vec0.get128(), vec1.get128() ) );
-}
-
-inline const floatInVec maxElem( const Vector3 &vec )
-{
-    return floatInVec( _mm_max_ps( _mm_max_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) );
-}
-
-inline const Vector3 minPerElem( const Vector3 &vec0, const Vector3 &vec1 )
-{
-    return Vector3( _mm_min_ps( vec0.get128(), vec1.get128() ) );
-}
-
-inline const floatInVec minElem( const Vector3 &vec )
-{
-    return floatInVec( _mm_min_ps( _mm_min_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) );
-}
-
-inline const floatInVec sum( const Vector3 &vec )
-{
-    return floatInVec( _mm_add_ps( _mm_add_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) );
-}
-
-inline const floatInVec dot( const Vector3 &vec0, const Vector3 &vec1 )
-{
-    return floatInVec( _vmathVfDot3( vec0.get128(), vec1.get128() ), 0 );
-}
-
-inline const floatInVec lengthSqr( const Vector3 &vec )
-{
-    return floatInVec(  _vmathVfDot3( vec.get128(), vec.get128() ), 0 );
-}
-
-inline const floatInVec length( const Vector3 &vec )
-{
-    return floatInVec(  _mm_sqrt_ps(_vmathVfDot3( vec.get128(), vec.get128() )), 0 );
-}
-
-inline const Vector3 normalize( const Vector3 &vec )
-{
-    return Vector3( _mm_mul_ps( vec.get128(), _mm_rsqrt_ps( _vmathVfDot3( vec.get128(), vec.get128() ) ) ) );
-}
-
-inline const Vector3 cross( const Vector3 &vec0, const Vector3 &vec1 )
-{
-    return Vector3( _vmathVfCross( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, bool select1 )
-{
-    return select( vec0, vec1, boolInVec(select1) );
-}
-
-inline const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, const boolInVec &select1 )
-{
-	return Vector3(vec_sel( vec0.get128(), vec1.get128(), select1.get128() ));
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Vector3 &vec )
-{
-    union { __m128 v; float s[4]; } tmp;
-    tmp.v = vec.get128();
-    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-inline void print( const Vector3 &vec, const char * name )
-{
-    union { __m128 v; float s[4]; } tmp;
-    tmp.v = vec.get128();
-    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-#endif
-
-inline Vector4::Vector4( float _x, float _y, float _z, float _w )
-{
-    mVec128 = _mm_setr_ps(_x, _y, _z, _w); 
- }
-
-inline Vector4::Vector4( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z, const floatInVec &_w )
-{
-	mVec128 = _mm_unpacklo_ps(
-		_mm_unpacklo_ps( _x.get128(), _z.get128() ),
-		_mm_unpacklo_ps( _y.get128(), _w.get128() ) );
-}
-
-inline Vector4::Vector4( const Vector3 &xyz, float _w )
-{
-    mVec128 = xyz.get128();
-    _vmathVfSetElement(mVec128, _w, 3);
-}
-
-inline Vector4::Vector4( const Vector3 &xyz, const floatInVec &_w )
-{
-    mVec128 = xyz.get128();
-    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
-}
-
-inline Vector4::Vector4( const Vector3 &vec )
-{
-    mVec128 = vec.get128();
-    mVec128 = _vmathVfInsert(mVec128, _mm_setzero_ps(), 3);
-}
-
-inline Vector4::Vector4( const Point3 &pnt )
-{
-    mVec128 = pnt.get128();
-    mVec128 = _vmathVfInsert(mVec128, _mm_set1_ps(1.0f), 3);
-}
-
-inline Vector4::Vector4( const Quat &quat )
-{
-    mVec128 = quat.get128();
-}
-
-inline Vector4::Vector4( float scalar )
-{
-    mVec128 = floatInVec(scalar).get128();
-}
-
-inline Vector4::Vector4( const floatInVec &scalar )
-{
-    mVec128 = scalar.get128();
-}
-
-inline Vector4::Vector4( __m128 vf4 )
-{
-    mVec128 = vf4;
-}
-
-inline const Vector4 Vector4::xAxis( )
-{
-    return Vector4( _VECTORMATH_UNIT_1000 );
-}
-
-inline const Vector4 Vector4::yAxis( )
-{
-    return Vector4( _VECTORMATH_UNIT_0100 );
-}
-
-inline const Vector4 Vector4::zAxis( )
-{
-    return Vector4( _VECTORMATH_UNIT_0010 );
-}
-
-inline const Vector4 Vector4::wAxis( )
-{
-    return Vector4( _VECTORMATH_UNIT_0001 );
-}
-
-inline const Vector4 lerp( float t, const Vector4 &vec0, const Vector4 &vec1 )
-{
-    return lerp( floatInVec(t), vec0, vec1 );
-}
-
-inline const Vector4 lerp( const floatInVec &t, const Vector4 &vec0, const Vector4 &vec1 )
-{
-    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
-}
-
-inline const Vector4 slerp( float t, const Vector4 &unitVec0, const Vector4 &unitVec1 )
-{
-    return slerp( floatInVec(t), unitVec0, unitVec1 );
-}
-
-inline const Vector4 slerp( const floatInVec &t, const Vector4 &unitVec0, const Vector4 &unitVec1 )
-{
-    __m128 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    cosAngle = _vmathVfDot4( unitVec0.get128(), unitVec1.get128() );
-    __m128 selectMask = _mm_cmpgt_ps( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = t.get128();
-    oneMinusT = _mm_sub_ps( _mm_set1_ps(1.0f), tttt );
-    angles = _mm_unpacklo_ps( _mm_set1_ps(1.0f), tttt ); // angles = 1, t, 1, t
-    angles = _mm_unpacklo_ps( angles, oneMinusT );		// angles = 1, 1-t, t, 1-t
-    angles = _mm_mul_ps( angles, angle );
-    sines = sinf4( angles );
-    scales = _mm_div_ps( sines, vec_splat( sines, 0 ) );
-    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
-    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
-    return Vector4( vec_madd( unitVec0.get128(), scale0, _mm_mul_ps( unitVec1.get128(), scale1 ) ) );
-}
-
-inline __m128 Vector4::get128( ) const
-{
-    return mVec128;
-}
-/*
-inline void storeHalfFloats( const Vector4 &vec0, const Vector4 &vec1, const Vector4 &vec2, const Vector4 &vec3, vec_ushort8 * twoQuads )
-{
-    twoQuads[0] = _vmath2VfToHalfFloats(vec0.get128(), vec1.get128());
-    twoQuads[1] = _vmath2VfToHalfFloats(vec2.get128(), vec3.get128());
-}
-*/
-inline Vector4 & Vector4::operator =( const Vector4 &vec )
-{
-    mVec128 = vec.mVec128;
-    return *this;
-}
-
-inline Vector4 & Vector4::setXYZ( const Vector3 &vec )
-{
-	__declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff};
-	mVec128 = vec_sel( vec.get128(), mVec128, sw );
-    return *this;
-}
-
-inline const Vector3 Vector4::getXYZ( ) const
-{
-    return Vector3( mVec128 );
-}
-
-inline Vector4 & Vector4::setX( float _x )
-{
-    _vmathVfSetElement(mVec128, _x, 0);
-    return *this;
-}
-
-inline Vector4 & Vector4::setX( const floatInVec &_x )
-{
-    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
-    return *this;
-}
-
-inline const floatInVec Vector4::getX( ) const
-{
-    return floatInVec( mVec128, 0 );
-}
-
-inline Vector4 & Vector4::setY( float _y )
-{
-    _vmathVfSetElement(mVec128, _y, 1);
-    return *this;
-}
-
-inline Vector4 & Vector4::setY( const floatInVec &_y )
-{
-    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
-    return *this;
-}
-
-inline const floatInVec Vector4::getY( ) const
-{
-    return floatInVec( mVec128, 1 );
-}
-
-inline Vector4 & Vector4::setZ( float _z )
-{
-    _vmathVfSetElement(mVec128, _z, 2);
-    return *this;
-}
-
-inline Vector4 & Vector4::setZ( const floatInVec &_z )
-{
-    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
-    return *this;
-}
-
-inline const floatInVec Vector4::getZ( ) const
-{
-    return floatInVec( mVec128, 2 );
-}
-
-inline Vector4 & Vector4::setW( float _w )
-{
-    _vmathVfSetElement(mVec128, _w, 3);
-    return *this;
-}
-
-inline Vector4 & Vector4::setW( const floatInVec &_w )
-{
-    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
-    return *this;
-}
-
-inline const floatInVec Vector4::getW( ) const
-{
-    return floatInVec( mVec128, 3 );
-}
-
-inline Vector4 & Vector4::setElem( int idx, float value )
-{
-    _vmathVfSetElement(mVec128, value, idx);
-    return *this;
-}
-
-inline Vector4 & Vector4::setElem( int idx, const floatInVec &value )
-{
-    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
-    return *this;
-}
-
-inline const floatInVec Vector4::getElem( int idx ) const
-{
-    return floatInVec( mVec128, idx );
-}
-
-inline VecIdx Vector4::operator []( int idx )
-{
-    return VecIdx( mVec128, idx );
-}
-
-inline const floatInVec Vector4::operator []( int idx ) const
-{
-    return floatInVec( mVec128, idx );
-}
-
-inline const Vector4 Vector4::operator +( const Vector4 &vec ) const
-{
-    return Vector4( _mm_add_ps( mVec128, vec.mVec128 ) );
-}
-
-inline const Vector4 Vector4::operator -( const Vector4 &vec ) const
-{
-    return Vector4( _mm_sub_ps( mVec128, vec.mVec128 ) );
-}
-
-inline const Vector4 Vector4::operator *( float scalar ) const
-{
-    return *this * floatInVec(scalar);
-}
-
-inline const Vector4 Vector4::operator *( const floatInVec &scalar ) const
-{
-    return Vector4( _mm_mul_ps( mVec128, scalar.get128() ) );
-}
-
-inline Vector4 & Vector4::operator +=( const Vector4 &vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator -=( const Vector4 &vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator *=( const floatInVec &scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Vector4 Vector4::operator /( float scalar ) const
-{
-    return *this / floatInVec(scalar);
-}
-
-inline const Vector4 Vector4::operator /( const floatInVec &scalar ) const
-{
-    return Vector4( _mm_div_ps( mVec128, scalar.get128() ) );
-}
-
-inline Vector4 & Vector4::operator /=( float scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator /=( const floatInVec &scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Vector4 Vector4::operator -( ) const
-{
-	return Vector4(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) );
-}
-
-inline const Vector4 operator *( float scalar, const Vector4 &vec )
-{
-    return floatInVec(scalar) * vec;
-}
-
-inline const Vector4 operator *( const floatInVec &scalar, const Vector4 &vec )
-{
-    return vec * scalar;
-}
-
-inline const Vector4 mulPerElem( const Vector4 &vec0, const Vector4 &vec1 )
-{
-    return Vector4( _mm_mul_ps( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector4 divPerElem( const Vector4 &vec0, const Vector4 &vec1 )
-{
-    return Vector4( _mm_div_ps( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector4 recipPerElem( const Vector4 &vec )
-{
-    return Vector4( _mm_rcp_ps( vec.get128() ) );
-}
-
-inline const Vector4 absPerElem( const Vector4 &vec )
-{
-    return Vector4( fabsf4( vec.get128() ) );
-}
-
-inline const Vector4 copySignPerElem( const Vector4 &vec0, const Vector4 &vec1 )
-{
-	__m128 vmask = toM128(0x7fffffff);
-	return Vector4( _mm_or_ps(
-		_mm_and_ps   ( vmask, vec0.get128() ),			// Value
-		_mm_andnot_ps( vmask, vec1.get128() ) ) );		// Signs
-}
-
-inline const Vector4 maxPerElem( const Vector4 &vec0, const Vector4 &vec1 )
-{
-    return Vector4( _mm_max_ps( vec0.get128(), vec1.get128() ) );
-}
-
-inline const floatInVec maxElem( const Vector4 &vec )
-{
-    return floatInVec( _mm_max_ps(
-		_mm_max_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ),
-		_mm_max_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) );
-}
-
-inline const Vector4 minPerElem( const Vector4 &vec0, const Vector4 &vec1 )
-{
-    return Vector4( _mm_min_ps( vec0.get128(), vec1.get128() ) );
-}
-
-inline const floatInVec minElem( const Vector4 &vec )
-{
-    return floatInVec( _mm_min_ps(
-		_mm_min_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ),
-		_mm_min_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) );
-}
-
-inline const floatInVec sum( const Vector4 &vec )
-{
-    return floatInVec( _mm_add_ps(
-		_mm_add_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ),
-		_mm_add_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) );
-}
-
-inline const floatInVec dot( const Vector4 &vec0, const Vector4 &vec1 )
-{
-    return floatInVec( _vmathVfDot4( vec0.get128(), vec1.get128() ), 0 );
-}
-
-inline const floatInVec lengthSqr( const Vector4 &vec )
-{
-    return floatInVec(  _vmathVfDot4( vec.get128(), vec.get128() ), 0 );
-}
-
-inline const floatInVec length( const Vector4 &vec )
-{
-    return floatInVec(  _mm_sqrt_ps(_vmathVfDot4( vec.get128(), vec.get128() )), 0 );
-}
-
-inline const Vector4 normalize( const Vector4 &vec )
-{
-    return Vector4( _mm_mul_ps( vec.get128(), _mm_rsqrt_ps( _vmathVfDot4( vec.get128(), vec.get128() ) ) ) );
-}
-
-inline const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, bool select1 )
-{
-    return select( vec0, vec1, boolInVec(select1) );
-}
-
-inline const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, const boolInVec &select1 )
-{
-    return Vector4( vec_sel( vec0.get128(), vec1.get128(), select1.get128() ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Vector4 &vec )
-{
-    union { __m128 v; float s[4]; } tmp;
-    tmp.v = vec.get128();
-    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-inline void print( const Vector4 &vec, const char * name )
-{
-    union { __m128 v; float s[4]; } tmp;
-    tmp.v = vec.get128();
-    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-#endif
-
-inline Point3::Point3( float _x, float _y, float _z )
-{
-    mVec128 = _mm_setr_ps(_x, _y, _z, 0.0f);
-}
-
-inline Point3::Point3( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z )
-{
-	mVec128 = _mm_unpacklo_ps( _mm_unpacklo_ps( _x.get128(), _z.get128() ), _y.get128() );
-}
-
-inline Point3::Point3( const Vector3 &vec )
-{
-    mVec128 = vec.get128();
-}
-
-inline Point3::Point3( float scalar )
-{
-    mVec128 = floatInVec(scalar).get128();
-}
-
-inline Point3::Point3( const floatInVec &scalar )
-{
-    mVec128 = scalar.get128();
-}
-
-inline Point3::Point3( __m128 vf4 )
-{
-    mVec128 = vf4;
-}
-
-inline const Point3 lerp( float t, const Point3 &pnt0, const Point3 &pnt1 )
-{
-    return lerp( floatInVec(t), pnt0, pnt1 );
-}
-
-inline const Point3 lerp( const floatInVec &t, const Point3 &pnt0, const Point3 &pnt1 )
-{
-    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
-}
-
-inline __m128 Point3::get128( ) const
-{
-    return mVec128;
-}
-
-inline void storeXYZ( const Point3 &pnt, __m128 * quad )
-{
-    __m128 dstVec = *quad;
-	__declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff}; // TODO: Centralize
-    dstVec = vec_sel(pnt.get128(), dstVec, sw);
-    *quad = dstVec;
-}
-
-inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const __m128 * threeQuads )
-{
-	const float *quads = (float *)threeQuads;
-    pnt0 = Point3(  _mm_load_ps(quads) );
-    pnt1 = Point3( _mm_loadu_ps(quads + 3) );
-    pnt2 = Point3( _mm_loadu_ps(quads + 6) );
-    pnt3 = Point3( _mm_loadu_ps(quads + 9) );
-}
-
-inline void storeXYZArray( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, __m128 * threeQuads )
-{
-	__m128 xxxx = _mm_shuffle_ps( pnt1.get128(), pnt1.get128(), _MM_SHUFFLE(0, 0, 0, 0) );
-	__m128 zzzz = _mm_shuffle_ps( pnt2.get128(), pnt2.get128(), _MM_SHUFFLE(2, 2, 2, 2) );
-	__declspec(align(16)) unsigned int xsw[4] = {0, 0, 0, 0xffffffff};
-	__declspec(align(16)) unsigned int zsw[4] = {0xffffffff, 0, 0, 0};
-	threeQuads[0] = vec_sel( pnt0.get128(), xxxx, xsw );
-    threeQuads[1] = _mm_shuffle_ps( pnt1.get128(), pnt2.get128(), _MM_SHUFFLE(1, 0, 2, 1) );
-    threeQuads[2] = vec_sel( _mm_shuffle_ps( pnt3.get128(), pnt3.get128(), _MM_SHUFFLE(2, 1, 0, 3) ), zzzz, zsw );
-}
-/*
-inline void storeHalfFloats( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, const Point3 &pnt4, const Point3 &pnt5, const Point3 &pnt6, const Point3 &pnt7, vec_ushort8 * threeQuads )
-{
-#if 0
-    __m128 xyz0[3];
-    __m128 xyz1[3];
-    storeXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
-    storeXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-#else
-	assert(0);
-#endif
-}
-*/
-inline Point3 & Point3::operator =( const Point3 &pnt )
-{
-    mVec128 = pnt.mVec128;
-    return *this;
-}
-
-inline Point3 & Point3::setX( float _x )
-{
-    _vmathVfSetElement(mVec128, _x, 0);
-    return *this;
-}
-
-inline Point3 & Point3::setX( const floatInVec &_x )
-{
-    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
-    return *this;
-}
-
-inline const floatInVec Point3::getX( ) const
-{
-    return floatInVec( mVec128, 0 );
-}
-
-inline Point3 & Point3::setY( float _y )
-{
-    _vmathVfSetElement(mVec128, _y, 1);
-    return *this;
-}
-
-inline Point3 & Point3::setY( const floatInVec &_y )
-{
-    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
-    return *this;
-}
-
-inline const floatInVec Point3::getY( ) const
-{
-    return floatInVec( mVec128, 1 );
-}
-
-inline Point3 & Point3::setZ( float _z )
-{
-    _vmathVfSetElement(mVec128, _z, 2);
-    return *this;
-}
-
-inline Point3 & Point3::setZ( const floatInVec &_z )
-{
-    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
-    return *this;
-}
-
-inline const floatInVec Point3::getZ( ) const
-{
-    return floatInVec( mVec128, 2 );
-}
-
-inline Point3 & Point3::setElem( int idx, float value )
-{
-    _vmathVfSetElement(mVec128, value, idx);
-    return *this;
-}
-
-inline Point3 & Point3::setElem( int idx, const floatInVec &value )
-{
-    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
-    return *this;
-}
-
-inline const floatInVec Point3::getElem( int idx ) const
-{
-    return floatInVec( mVec128, idx );
-}
-
-inline VecIdx Point3::operator []( int idx )
-{
-    return VecIdx( mVec128, idx );
-}
-
-inline const floatInVec Point3::operator []( int idx ) const
-{
-    return floatInVec( mVec128, idx );
-}
-
-inline const Vector3 Point3::operator -( const Point3 &pnt ) const
-{
-    return Vector3( _mm_sub_ps( mVec128, pnt.mVec128 ) );
-}
-
-inline const Point3 Point3::operator +( const Vector3 &vec ) const
-{
-    return Point3( _mm_add_ps( mVec128, vec.get128() ) );
-}
-
-inline const Point3 Point3::operator -( const Vector3 &vec ) const
-{
-    return Point3( _mm_sub_ps( mVec128, vec.get128() ) );
-}
-
-inline Point3 & Point3::operator +=( const Vector3 &vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Point3 & Point3::operator -=( const Vector3 &vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline const Point3 mulPerElem( const Point3 &pnt0, const Point3 &pnt1 )
-{
-    return Point3( _mm_mul_ps( pnt0.get128(), pnt1.get128() ) );
-}
-
-inline const Point3 divPerElem( const Point3 &pnt0, const Point3 &pnt1 )
-{
-    return Point3( _mm_div_ps( pnt0.get128(), pnt1.get128() ) );
-}
-
-inline const Point3 recipPerElem( const Point3 &pnt )
-{
-    return Point3( _mm_rcp_ps( pnt.get128() ) );
-}
-
-inline const Point3 absPerElem( const Point3 &pnt )
-{
-    return Point3( fabsf4( pnt.get128() ) );
-}
-
-inline const Point3 copySignPerElem( const Point3 &pnt0, const Point3 &pnt1 )
-{
-	__m128 vmask = toM128(0x7fffffff);
-	return Point3( _mm_or_ps(
-		_mm_and_ps   ( vmask, pnt0.get128() ),			// Value
-		_mm_andnot_ps( vmask, pnt1.get128() ) ) );		// Signs
-}
-
-inline const Point3 maxPerElem( const Point3 &pnt0, const Point3 &pnt1 )
-{
-    return Point3( _mm_max_ps( pnt0.get128(), pnt1.get128() ) );
-}
-
-inline const floatInVec maxElem( const Point3 &pnt )
-{
-    return floatInVec( _mm_max_ps( _mm_max_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) );
-}
-
-inline const Point3 minPerElem( const Point3 &pnt0, const Point3 &pnt1 )
-{
-    return Point3( _mm_min_ps( pnt0.get128(), pnt1.get128() ) );
-}
-
-inline const floatInVec minElem( const Point3 &pnt )
-{
-    return floatInVec( _mm_min_ps( _mm_min_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) );
-}
-
-inline const floatInVec sum( const Point3 &pnt )
-{
-    return floatInVec( _mm_add_ps( _mm_add_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) );
-}
-
-inline const Point3 scale( const Point3 &pnt, float scaleVal )
-{
-    return scale( pnt, floatInVec( scaleVal ) );
-}
-
-inline const Point3 scale( const Point3 &pnt, const floatInVec &scaleVal )
-{
-    return mulPerElem( pnt, Point3( scaleVal ) );
-}
-
-inline const Point3 scale( const Point3 &pnt, const Vector3 &scaleVec )
-{
-    return mulPerElem( pnt, Point3( scaleVec ) );
-}
-
-inline const floatInVec projection( const Point3 &pnt, const Vector3 &unitVec )
-{
-    return floatInVec( _vmathVfDot3( pnt.get128(), unitVec.get128() ), 0 );
-}
-
-inline const floatInVec distSqrFromOrigin( const Point3 &pnt )
-{
-    return lengthSqr( Vector3( pnt ) );
-}
-
-inline const floatInVec distFromOrigin( const Point3 &pnt )
-{
-    return length( Vector3( pnt ) );
-}
-
-inline const floatInVec distSqr( const Point3 &pnt0, const Point3 &pnt1 )
-{
-    return lengthSqr( ( pnt1 - pnt0 ) );
-}
-
-inline const floatInVec dist( const Point3 &pnt0, const Point3 &pnt1 )
-{
-    return length( ( pnt1 - pnt0 ) );
-}
-
-inline const Point3 select( const Point3 &pnt0, const Point3 &pnt1, bool select1 )
-{
-    return select( pnt0, pnt1, boolInVec(select1) );
-}
-
-inline const Point3 select( const Point3 &pnt0, const Point3 &pnt1, const boolInVec &select1 )
-{
-    return Point3( vec_sel( pnt0.get128(), pnt1.get128(), select1.get128() ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Point3 &pnt )
-{
-    union { __m128 v; float s[4]; } tmp;
-    tmp.v = pnt.get128();
-    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-inline void print( const Point3 &pnt, const char * name )
-{
-    union { __m128 v; float s[4]; } tmp;
-    tmp.v = pnt.get128();
-    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-#endif
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_AOS_CPP_H
+#define _VECTORMATH_VEC_AOS_CPP_H
+
+//-----------------------------------------------------------------------------
+// Constants
+// for permutes words are labeled [x,y,z,w] [a,b,c,d]
+
+#define _VECTORMATH_PERM_X 0x00010203
+#define _VECTORMATH_PERM_Y 0x04050607
+#define _VECTORMATH_PERM_Z 0x08090a0b
+#define _VECTORMATH_PERM_W 0x0c0d0e0f
+#define _VECTORMATH_PERM_A 0x10111213
+#define _VECTORMATH_PERM_B 0x14151617
+#define _VECTORMATH_PERM_C 0x18191a1b
+#define _VECTORMATH_PERM_D 0x1c1d1e1f
+#define _VECTORMATH_PERM_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A }
+#define _VECTORMATH_PERM_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B }
+#define _VECTORMATH_PERM_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B, _VECTORMATH_PERM_C }
+#define _VECTORMATH_PERM_XYAW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_XAZW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W }
+#define _VECTORMATH_MASK_0xF000 (vec_uint4){ 0xffffffff, 0, 0, 0 }
+#define _VECTORMATH_MASK_0x0F00 (vec_uint4){ 0, 0xffffffff, 0, 0 }
+#define _VECTORMATH_MASK_0x00F0 (vec_uint4){ 0, 0, 0xffffffff, 0 }
+#define _VECTORMATH_MASK_0x000F (vec_uint4){ 0, 0, 0, 0xffffffff }
+#define _VECTORMATH_UNIT_1000 _mm_setr_ps(1.0f,0.0f,0.0f,0.0f) // (__m128){ 1.0f, 0.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0100 _mm_setr_ps(0.0f,1.0f,0.0f,0.0f) // (__m128){ 0.0f, 1.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0010 _mm_setr_ps(0.0f,0.0f,1.0f,0.0f) // (__m128){ 0.0f, 0.0f, 1.0f, 0.0f }
+#define _VECTORMATH_UNIT_0001 _mm_setr_ps(0.0f,0.0f,0.0f,1.0f) // (__m128){ 0.0f, 0.0f, 0.0f, 1.0f }
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+static inline __m128 _vmathVfDot3( __m128 vec0, __m128 vec1 )
+{
+    __m128 result = _mm_mul_ps( vec0, vec1);
+    return _mm_add_ps( vec_splat( result, 0 ), _mm_add_ps( vec_splat( result, 1 ), vec_splat( result, 2 ) ) );
+}
+
+static inline __m128 _vmathVfDot4( __m128 vec0, __m128 vec1 )
+{
+    __m128 result = _mm_mul_ps(vec0, vec1);
+	return _mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(0,0,0,0)),
+			_mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(1,1,1,1)),
+			_mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(2,2,2,2)), _mm_shuffle_ps(result, result, _MM_SHUFFLE(3,3,3,3)))));
+}
+
+static inline __m128 _vmathVfCross( __m128 vec0, __m128 vec1 )
+{
+    __m128 tmp0, tmp1, tmp2, tmp3, result;
+    tmp0 = _mm_shuffle_ps( vec0, vec0, _MM_SHUFFLE(3,0,2,1) );
+    tmp1 = _mm_shuffle_ps( vec1, vec1, _MM_SHUFFLE(3,1,0,2) );
+    tmp2 = _mm_shuffle_ps( vec0, vec0, _MM_SHUFFLE(3,1,0,2) );
+    tmp3 = _mm_shuffle_ps( vec1, vec1, _MM_SHUFFLE(3,0,2,1) );
+    result = vec_mul( tmp0, tmp1 );
+    result = vec_nmsub( tmp2, tmp3, result );
+    return result;
+}
+/*
+static inline vec_uint4 _vmathVfToHalfFloatsUnpacked(__m128 v)
+{
+#if 0
+    vec_int4 bexp;
+    vec_uint4 mant, sign, hfloat;
+    vec_uint4 notZero, isInf;
+    const vec_uint4 hfloatInf = (vec_uint4)(0x00007c00u);
+    const vec_uint4 mergeMant = (vec_uint4)(0x000003ffu);
+    const vec_uint4 mergeSign = (vec_uint4)(0x00008000u);
+
+    sign = vec_sr((vec_uint4)v, (vec_uint4)16);
+    mant = vec_sr((vec_uint4)v, (vec_uint4)13);
+    bexp = vec_and(vec_sr((vec_int4)v, (vec_uint4)23), (vec_int4)0xff);
+
+    notZero = (vec_uint4)vec_cmpgt(bexp, (vec_int4)112);
+    isInf = (vec_uint4)vec_cmpgt(bexp, (vec_int4)142);
+
+    bexp = _mm_add_ps(bexp, (vec_int4)-112);
+    bexp = vec_sl(bexp, (vec_uint4)10);
+
+    hfloat = vec_sel((vec_uint4)bexp, mant, mergeMant);
+    hfloat = vec_sel((vec_uint4)(0), hfloat, notZero);
+    hfloat = vec_sel(hfloat, hfloatInf, isInf);
+    hfloat = vec_sel(hfloat, sign, mergeSign);
+
+    return hfloat;
+#else
+	assert(0);
+	return _mm_setzero_ps();
+#endif
+}
+
+static inline vec_ushort8 _vmath2VfToHalfFloats(__m128 u, __m128 v)
+{
+#if 0
+    vec_uint4 hfloat_u, hfloat_v;
+    const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
+    hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
+    hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
+    return (vec_ushort8)vec_perm(hfloat_u, hfloat_v, pack);
+#else
+	assert(0);
+	return _mm_setzero_si128();
+#endif
+}
+*/
+
+static inline __m128 _vmathVfInsert(__m128 dst, __m128 src, int slot)
+{
+	SSEFloat s;
+	s.m128 = src;
+	SSEFloat d;
+	d.m128 = dst;
+	d.f[slot] = s.f[slot];
+	return d.m128;
+}
+
+#define _vmathVfSetElement(vec, scalar, slot) ((float *)&(vec))[slot] = scalar
+
+static inline __m128 _vmathVfSplatScalar(float scalar)
+{
+	return _mm_set1_ps(scalar);
+}
+
+#endif
+
+namespace Vectormath {
+namespace Aos {
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+inline VecIdx::operator floatInVec() const
+{
+    return floatInVec(ref, i);
+}
+
+inline float VecIdx::getAsFloat() const
+#else
+inline VecIdx::operator float() const
+#endif
+{
+    return ((float *)&ref)[i];
+}
+
+inline float VecIdx::operator =( float scalar )
+{
+    _vmathVfSetElement(ref, scalar, i);
+    return scalar;
+}
+
+inline floatInVec VecIdx::operator =( const floatInVec &scalar )
+{
+    ref = _vmathVfInsert(ref, scalar.get128(), i);
+    return scalar;
+}
+
+inline floatInVec VecIdx::operator =( const VecIdx& scalar )
+{
+    return *this = floatInVec(scalar.ref, scalar.i);
+}
+
+inline floatInVec VecIdx::operator *=( float scalar )
+{
+    return *this *= floatInVec(scalar);
+}
+
+inline floatInVec VecIdx::operator *=( const floatInVec &scalar )
+{
+    return *this = floatInVec(ref, i) * scalar;
+}
+
+inline floatInVec VecIdx::operator /=( float scalar )
+{
+    return *this /= floatInVec(scalar);
+}
+
+inline floatInVec VecIdx::operator /=( const floatInVec &scalar )
+{
+    return *this = floatInVec(ref, i) / scalar;
+}
+
+inline floatInVec VecIdx::operator +=( float scalar )
+{
+    return *this += floatInVec(scalar);
+}
+
+inline floatInVec VecIdx::operator +=( const floatInVec &scalar )
+{
+    return *this = floatInVec(ref, i) + scalar;
+}
+
+inline floatInVec VecIdx::operator -=( float scalar )
+{
+    return *this -= floatInVec(scalar);
+}
+
+inline floatInVec VecIdx::operator -=( const floatInVec &scalar )
+{
+    return *this = floatInVec(ref, i) - scalar;
+}
+
+inline Vector3::Vector3( float _x, float _y, float _z )
+{
+    mVec128 = _mm_setr_ps(_x, _y, _z, 0.0f);
+}
+
+inline Vector3::Vector3( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z )
+{
+	__m128 xz = _mm_unpacklo_ps( _x.get128(), _z.get128() );
+	mVec128 = _mm_unpacklo_ps( xz, _y.get128() );
+}
+
+inline Vector3::Vector3( const Point3 &pnt )
+{
+    mVec128 = pnt.get128();
+}
+
+inline Vector3::Vector3( float scalar )
+{
+    mVec128 = floatInVec(scalar).get128();
+}
+
+inline Vector3::Vector3( const floatInVec &scalar )
+{
+    mVec128 = scalar.get128();
+}
+
+inline Vector3::Vector3( __m128 vf4 )
+{
+    mVec128 = vf4;
+}
+
+inline const Vector3 Vector3::xAxis( )
+{
+    return Vector3( _VECTORMATH_UNIT_1000 );
+}
+
+inline const Vector3 Vector3::yAxis( )
+{
+    return Vector3( _VECTORMATH_UNIT_0100 );
+}
+
+inline const Vector3 Vector3::zAxis( )
+{
+    return Vector3( _VECTORMATH_UNIT_0010 );
+}
+
+inline const Vector3 lerp( float t, const Vector3 &vec0, const Vector3 &vec1 )
+{
+    return lerp( floatInVec(t), vec0, vec1 );
+}
+
+inline const Vector3 lerp( const floatInVec &t, const Vector3 &vec0, const Vector3 &vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector3 slerp( float t, const Vector3 &unitVec0, const Vector3 &unitVec1 )
+{
+    return slerp( floatInVec(t), unitVec0, unitVec1 );
+}
+
+inline const Vector3 slerp( const floatInVec &t, const Vector3 &unitVec0, const Vector3 &unitVec1 )
+{
+    __m128 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
+    __m128 selectMask = _mm_cmpgt_ps( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = t.get128();
+    oneMinusT = _mm_sub_ps( _mm_set1_ps(1.0f), tttt );
+    angles = _mm_unpacklo_ps( _mm_set1_ps(1.0f), tttt ); // angles = 1, t, 1, t
+    angles = _mm_unpacklo_ps( angles, oneMinusT );		// angles = 1, 1-t, t, 1-t
+    angles = _mm_mul_ps( angles, angle );
+    sines = sinf4( angles );
+    scales = _mm_div_ps( sines, vec_splat( sines, 0 ) );
+    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
+    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
+    return Vector3( vec_madd( unitVec0.get128(), scale0, _mm_mul_ps( unitVec1.get128(), scale1 ) ) );
+}
+
+inline __m128 Vector3::get128( ) const
+{
+    return mVec128;
+}
+
+inline void storeXYZ( const Vector3 &vec, __m128 * quad )
+{
+    __m128 dstVec = *quad;
+	__declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff}; // TODO: Centralize
+    dstVec = vec_sel(vec.get128(), dstVec, sw);
+    *quad = dstVec;
+}
+
+inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const __m128 * threeQuads )
+{
+	const float *quads = (float *)threeQuads;
+    vec0 = Vector3(  _mm_load_ps(quads) );
+    vec1 = Vector3( _mm_loadu_ps(quads + 3) );
+    vec2 = Vector3( _mm_loadu_ps(quads + 6) );
+    vec3 = Vector3( _mm_loadu_ps(quads + 9) );
+}
+
+inline void storeXYZArray( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, __m128 * threeQuads )
+{
+	__m128 xxxx = _mm_shuffle_ps( vec1.get128(), vec1.get128(), _MM_SHUFFLE(0, 0, 0, 0) );
+	__m128 zzzz = _mm_shuffle_ps( vec2.get128(), vec2.get128(), _MM_SHUFFLE(2, 2, 2, 2) );
+	__declspec(align(16)) unsigned int xsw[4] = {0, 0, 0, 0xffffffff};
+	__declspec(align(16)) unsigned int zsw[4] = {0xffffffff, 0, 0, 0};
+	threeQuads[0] = vec_sel( vec0.get128(), xxxx, xsw );
+    threeQuads[1] = _mm_shuffle_ps( vec1.get128(), vec2.get128(), _MM_SHUFFLE(1, 0, 2, 1) );
+    threeQuads[2] = vec_sel( _mm_shuffle_ps( vec3.get128(), vec3.get128(), _MM_SHUFFLE(2, 1, 0, 3) ), zzzz, zsw );
+}
+/*
+inline void storeHalfFloats( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, const Vector3 &vec4, const Vector3 &vec5, const Vector3 &vec6, const Vector3 &vec7, vec_ushort8 * threeQuads )
+{
+	assert(0);
+#if 0
+    __m128 xyz0[3];
+    __m128 xyz1[3];
+    storeXYZArray( vec0, vec1, vec2, vec3, xyz0 );
+    storeXYZArray( vec4, vec5, vec6, vec7, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+#endif
+}
+*/
+inline Vector3 & Vector3::operator =( const Vector3 &vec )
+{
+    mVec128 = vec.mVec128;
+    return *this;
+}
+
+inline Vector3 & Vector3::setX( float _x )
+{
+    _vmathVfSetElement(mVec128, _x, 0);
+    return *this;
+}
+
+inline Vector3 & Vector3::setX( const floatInVec &_x )
+{
+    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
+    return *this;
+}
+
+inline const floatInVec Vector3::getX( ) const
+{
+    return floatInVec( mVec128, 0 );
+}
+
+inline Vector3 & Vector3::setY( float _y )
+{
+    _vmathVfSetElement(mVec128, _y, 1);
+    return *this;
+}
+
+inline Vector3 & Vector3::setY( const floatInVec &_y )
+{
+    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
+    return *this;
+}
+
+inline const floatInVec Vector3::getY( ) const
+{
+    return floatInVec( mVec128, 1 );
+}
+
+inline Vector3 & Vector3::setZ( float _z )
+{
+    _vmathVfSetElement(mVec128, _z, 2);
+    return *this;
+}
+
+inline Vector3 & Vector3::setZ( const floatInVec &_z )
+{
+    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
+    return *this;
+}
+
+inline const floatInVec Vector3::getZ( ) const
+{
+    return floatInVec( mVec128, 2 );
+}
+
+inline Vector3 & Vector3::setElem( int idx, float value )
+{
+    _vmathVfSetElement(mVec128, value, idx);
+    return *this;
+}
+
+inline Vector3 & Vector3::setElem( int idx, const floatInVec &value )
+{
+    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
+    return *this;
+}
+
+inline const floatInVec Vector3::getElem( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+inline VecIdx Vector3::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+inline const floatInVec Vector3::operator []( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+inline const Vector3 Vector3::operator +( const Vector3 &vec ) const
+{
+    return Vector3( _mm_add_ps( mVec128, vec.mVec128 ) );
+}
+
+inline const Vector3 Vector3::operator -( const Vector3 &vec ) const
+{
+    return Vector3( _mm_sub_ps( mVec128, vec.mVec128 ) );
+}
+
+inline const Point3 Vector3::operator +( const Point3 &pnt ) const
+{
+    return Point3( _mm_add_ps( mVec128, pnt.get128() ) );
+}
+
+inline const Vector3 Vector3::operator *( float scalar ) const
+{
+    return *this * floatInVec(scalar);
+}
+
+inline const Vector3 Vector3::operator *( const floatInVec &scalar ) const
+{
+    return Vector3( _mm_mul_ps( mVec128, scalar.get128() ) );
+}
+
+inline Vector3 & Vector3::operator +=( const Vector3 &vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator -=( const Vector3 &vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator *=( const floatInVec &scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator /( float scalar ) const
+{
+    return *this / floatInVec(scalar);
+}
+
+inline const Vector3 Vector3::operator /( const floatInVec &scalar ) const
+{
+    return Vector3( _mm_div_ps( mVec128, scalar.get128() ) );
+}
+
+inline Vector3 & Vector3::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator /=( const floatInVec &scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator -( ) const
+{
+	return Vector3(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) );
+}
+
+inline const Vector3 operator *( float scalar, const Vector3 &vec )
+{
+    return floatInVec(scalar) * vec;
+}
+
+inline const Vector3 operator *( const floatInVec &scalar, const Vector3 &vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector3 mulPerElem( const Vector3 &vec0, const Vector3 &vec1 )
+{
+    return Vector3( _mm_mul_ps( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector3 divPerElem( const Vector3 &vec0, const Vector3 &vec1 )
+{
+    return Vector3( _mm_div_ps( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector3 recipPerElem( const Vector3 &vec )
+{
+    return Vector3( _mm_rcp_ps( vec.get128() ) );
+}
+
+inline const Vector3 absPerElem( const Vector3 &vec )
+{
+    return Vector3( fabsf4( vec.get128() ) );
+}
+
+inline const Vector3 copySignPerElem( const Vector3 &vec0, const Vector3 &vec1 )
+{
+	__m128 vmask = toM128(0x7fffffff);
+	return Vector3( _mm_or_ps(
+		_mm_and_ps   ( vmask, vec0.get128() ),			// Value
+		_mm_andnot_ps( vmask, vec1.get128() ) ) );		// Signs
+}
+
+inline const Vector3 maxPerElem( const Vector3 &vec0, const Vector3 &vec1 )
+{
+    return Vector3( _mm_max_ps( vec0.get128(), vec1.get128() ) );
+}
+
+inline const floatInVec maxElem( const Vector3 &vec )
+{
+    return floatInVec( _mm_max_ps( _mm_max_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) );
+}
+
+inline const Vector3 minPerElem( const Vector3 &vec0, const Vector3 &vec1 )
+{
+    return Vector3( _mm_min_ps( vec0.get128(), vec1.get128() ) );
+}
+
+inline const floatInVec minElem( const Vector3 &vec )
+{
+    return floatInVec( _mm_min_ps( _mm_min_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) );
+}
+
+inline const floatInVec sum( const Vector3 &vec )
+{
+    return floatInVec( _mm_add_ps( _mm_add_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) );
+}
+
+inline const floatInVec dot( const Vector3 &vec0, const Vector3 &vec1 )
+{
+    return floatInVec( _vmathVfDot3( vec0.get128(), vec1.get128() ), 0 );
+}
+
+inline const floatInVec lengthSqr( const Vector3 &vec )
+{
+    return floatInVec(  _vmathVfDot3( vec.get128(), vec.get128() ), 0 );
+}
+
+inline const floatInVec length( const Vector3 &vec )
+{
+    return floatInVec(  _mm_sqrt_ps(_vmathVfDot3( vec.get128(), vec.get128() )), 0 );
+}
+
+inline const Vector3 normalize( const Vector3 &vec )
+{
+    return Vector3( _mm_mul_ps( vec.get128(), _mm_rsqrt_ps( _vmathVfDot3( vec.get128(), vec.get128() ) ) ) );
+}
+
+inline const Vector3 cross( const Vector3 &vec0, const Vector3 &vec1 )
+{
+    return Vector3( _vmathVfCross( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, bool select1 )
+{
+    return select( vec0, vec1, boolInVec(select1) );
+}
+
+inline const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, const boolInVec &select1 )
+{
+	return Vector3(vec_sel( vec0.get128(), vec1.get128(), select1.get128() ));
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Vector3 &vec )
+{
+    union { __m128 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+inline void print( const Vector3 &vec, const char * name )
+{
+    union { __m128 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+#endif
+
+inline Vector4::Vector4( float _x, float _y, float _z, float _w )
+{
+    mVec128 = _mm_setr_ps(_x, _y, _z, _w); 
+ }
+
+inline Vector4::Vector4( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z, const floatInVec &_w )
+{
+	mVec128 = _mm_unpacklo_ps(
+		_mm_unpacklo_ps( _x.get128(), _z.get128() ),
+		_mm_unpacklo_ps( _y.get128(), _w.get128() ) );
+}
+
+inline Vector4::Vector4( const Vector3 &xyz, float _w )
+{
+    mVec128 = xyz.get128();
+    _vmathVfSetElement(mVec128, _w, 3);
+}
+
+inline Vector4::Vector4( const Vector3 &xyz, const floatInVec &_w )
+{
+    mVec128 = xyz.get128();
+    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
+}
+
+inline Vector4::Vector4( const Vector3 &vec )
+{
+    mVec128 = vec.get128();
+    mVec128 = _vmathVfInsert(mVec128, _mm_setzero_ps(), 3);
+}
+
+inline Vector4::Vector4( const Point3 &pnt )
+{
+    mVec128 = pnt.get128();
+    mVec128 = _vmathVfInsert(mVec128, _mm_set1_ps(1.0f), 3);
+}
+
+inline Vector4::Vector4( const Quat &quat )
+{
+    mVec128 = quat.get128();
+}
+
+inline Vector4::Vector4( float scalar )
+{
+    mVec128 = floatInVec(scalar).get128();
+}
+
+inline Vector4::Vector4( const floatInVec &scalar )
+{
+    mVec128 = scalar.get128();
+}
+
+inline Vector4::Vector4( __m128 vf4 )
+{
+    mVec128 = vf4;
+}
+
+inline const Vector4 Vector4::xAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_1000 );
+}
+
+inline const Vector4 Vector4::yAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_0100 );
+}
+
+inline const Vector4 Vector4::zAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_0010 );
+}
+
+inline const Vector4 Vector4::wAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_0001 );
+}
+
+inline const Vector4 lerp( float t, const Vector4 &vec0, const Vector4 &vec1 )
+{
+    return lerp( floatInVec(t), vec0, vec1 );
+}
+
+inline const Vector4 lerp( const floatInVec &t, const Vector4 &vec0, const Vector4 &vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector4 slerp( float t, const Vector4 &unitVec0, const Vector4 &unitVec1 )
+{
+    return slerp( floatInVec(t), unitVec0, unitVec1 );
+}
+
+inline const Vector4 slerp( const floatInVec &t, const Vector4 &unitVec0, const Vector4 &unitVec1 )
+{
+    __m128 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    cosAngle = _vmathVfDot4( unitVec0.get128(), unitVec1.get128() );
+    __m128 selectMask = _mm_cmpgt_ps( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = t.get128();
+    oneMinusT = _mm_sub_ps( _mm_set1_ps(1.0f), tttt );
+    angles = _mm_unpacklo_ps( _mm_set1_ps(1.0f), tttt ); // angles = 1, t, 1, t
+    angles = _mm_unpacklo_ps( angles, oneMinusT );		// angles = 1, 1-t, t, 1-t
+    angles = _mm_mul_ps( angles, angle );
+    sines = sinf4( angles );
+    scales = _mm_div_ps( sines, vec_splat( sines, 0 ) );
+    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
+    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
+    return Vector4( vec_madd( unitVec0.get128(), scale0, _mm_mul_ps( unitVec1.get128(), scale1 ) ) );
+}
+
+inline __m128 Vector4::get128( ) const
+{
+    return mVec128;
+}
+/*
+inline void storeHalfFloats( const Vector4 &vec0, const Vector4 &vec1, const Vector4 &vec2, const Vector4 &vec3, vec_ushort8 * twoQuads )
+{
+    twoQuads[0] = _vmath2VfToHalfFloats(vec0.get128(), vec1.get128());
+    twoQuads[1] = _vmath2VfToHalfFloats(vec2.get128(), vec3.get128());
+}
+*/
+inline Vector4 & Vector4::operator =( const Vector4 &vec )
+{
+    mVec128 = vec.mVec128;
+    return *this;
+}
+
+inline Vector4 & Vector4::setXYZ( const Vector3 &vec )
+{
+	__declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff};
+	mVec128 = vec_sel( vec.get128(), mVec128, sw );
+    return *this;
+}
+
+inline const Vector3 Vector4::getXYZ( ) const
+{
+    return Vector3( mVec128 );
+}
+
+inline Vector4 & Vector4::setX( float _x )
+{
+    _vmathVfSetElement(mVec128, _x, 0);
+    return *this;
+}
+
+inline Vector4 & Vector4::setX( const floatInVec &_x )
+{
+    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
+    return *this;
+}
+
+inline const floatInVec Vector4::getX( ) const
+{
+    return floatInVec( mVec128, 0 );
+}
+
+inline Vector4 & Vector4::setY( float _y )
+{
+    _vmathVfSetElement(mVec128, _y, 1);
+    return *this;
+}
+
+inline Vector4 & Vector4::setY( const floatInVec &_y )
+{
+    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
+    return *this;
+}
+
+inline const floatInVec Vector4::getY( ) const
+{
+    return floatInVec( mVec128, 1 );
+}
+
+inline Vector4 & Vector4::setZ( float _z )
+{
+    _vmathVfSetElement(mVec128, _z, 2);
+    return *this;
+}
+
+inline Vector4 & Vector4::setZ( const floatInVec &_z )
+{
+    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
+    return *this;
+}
+
+inline const floatInVec Vector4::getZ( ) const
+{
+    return floatInVec( mVec128, 2 );
+}
+
+inline Vector4 & Vector4::setW( float _w )
+{
+    _vmathVfSetElement(mVec128, _w, 3);
+    return *this;
+}
+
+inline Vector4 & Vector4::setW( const floatInVec &_w )
+{
+    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
+    return *this;
+}
+
+inline const floatInVec Vector4::getW( ) const
+{
+    return floatInVec( mVec128, 3 );
+}
+
+inline Vector4 & Vector4::setElem( int idx, float value )
+{
+    _vmathVfSetElement(mVec128, value, idx);
+    return *this;
+}
+
+inline Vector4 & Vector4::setElem( int idx, const floatInVec &value )
+{
+    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
+    return *this;
+}
+
+inline const floatInVec Vector4::getElem( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+inline VecIdx Vector4::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+inline const floatInVec Vector4::operator []( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+inline const Vector4 Vector4::operator +( const Vector4 &vec ) const
+{
+    return Vector4( _mm_add_ps( mVec128, vec.mVec128 ) );
+}
+
+inline const Vector4 Vector4::operator -( const Vector4 &vec ) const
+{
+    return Vector4( _mm_sub_ps( mVec128, vec.mVec128 ) );
+}
+
+inline const Vector4 Vector4::operator *( float scalar ) const
+{
+    return *this * floatInVec(scalar);
+}
+
+inline const Vector4 Vector4::operator *( const floatInVec &scalar ) const
+{
+    return Vector4( _mm_mul_ps( mVec128, scalar.get128() ) );
+}
+
+inline Vector4 & Vector4::operator +=( const Vector4 &vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator -=( const Vector4 &vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator *=( const floatInVec &scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator /( float scalar ) const
+{
+    return *this / floatInVec(scalar);
+}
+
+inline const Vector4 Vector4::operator /( const floatInVec &scalar ) const
+{
+    return Vector4( _mm_div_ps( mVec128, scalar.get128() ) );
+}
+
+inline Vector4 & Vector4::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator /=( const floatInVec &scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator -( ) const
+{
+	return Vector4(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) );
+}
+
+inline const Vector4 operator *( float scalar, const Vector4 &vec )
+{
+    return floatInVec(scalar) * vec;
+}
+
+inline const Vector4 operator *( const floatInVec &scalar, const Vector4 &vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector4 mulPerElem( const Vector4 &vec0, const Vector4 &vec1 )
+{
+    return Vector4( _mm_mul_ps( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector4 divPerElem( const Vector4 &vec0, const Vector4 &vec1 )
+{
+    return Vector4( _mm_div_ps( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector4 recipPerElem( const Vector4 &vec )
+{
+    return Vector4( _mm_rcp_ps( vec.get128() ) );
+}
+
+inline const Vector4 absPerElem( const Vector4 &vec )
+{
+    return Vector4( fabsf4( vec.get128() ) );
+}
+
+inline const Vector4 copySignPerElem( const Vector4 &vec0, const Vector4 &vec1 )
+{
+	__m128 vmask = toM128(0x7fffffff);
+	return Vector4( _mm_or_ps(
+		_mm_and_ps   ( vmask, vec0.get128() ),			// Value
+		_mm_andnot_ps( vmask, vec1.get128() ) ) );		// Signs
+}
+
+inline const Vector4 maxPerElem( const Vector4 &vec0, const Vector4 &vec1 )
+{
+    return Vector4( _mm_max_ps( vec0.get128(), vec1.get128() ) );
+}
+
+inline const floatInVec maxElem( const Vector4 &vec )
+{
+    return floatInVec( _mm_max_ps(
+		_mm_max_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ),
+		_mm_max_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) );
+}
+
+inline const Vector4 minPerElem( const Vector4 &vec0, const Vector4 &vec1 )
+{
+    return Vector4( _mm_min_ps( vec0.get128(), vec1.get128() ) );
+}
+
+inline const floatInVec minElem( const Vector4 &vec )
+{
+    return floatInVec( _mm_min_ps(
+		_mm_min_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ),
+		_mm_min_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) );
+}
+
+inline const floatInVec sum( const Vector4 &vec )
+{
+    return floatInVec( _mm_add_ps(
+		_mm_add_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ),
+		_mm_add_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) );
+}
+
+inline const floatInVec dot( const Vector4 &vec0, const Vector4 &vec1 )
+{
+    return floatInVec( _vmathVfDot4( vec0.get128(), vec1.get128() ), 0 );
+}
+
+inline const floatInVec lengthSqr( const Vector4 &vec )
+{
+    return floatInVec(  _vmathVfDot4( vec.get128(), vec.get128() ), 0 );
+}
+
+inline const floatInVec length( const Vector4 &vec )
+{
+    return floatInVec(  _mm_sqrt_ps(_vmathVfDot4( vec.get128(), vec.get128() )), 0 );
+}
+
+inline const Vector4 normalize( const Vector4 &vec )
+{
+    return Vector4( _mm_mul_ps( vec.get128(), _mm_rsqrt_ps( _vmathVfDot4( vec.get128(), vec.get128() ) ) ) );
+}
+
+inline const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, bool select1 )
+{
+    return select( vec0, vec1, boolInVec(select1) );
+}
+
+inline const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, const boolInVec &select1 )
+{
+    return Vector4( vec_sel( vec0.get128(), vec1.get128(), select1.get128() ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Vector4 &vec )
+{
+    union { __m128 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+inline void print( const Vector4 &vec, const char * name )
+{
+    union { __m128 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+#endif
+
+inline Point3::Point3( float _x, float _y, float _z )
+{
+    mVec128 = _mm_setr_ps(_x, _y, _z, 0.0f);
+}
+
+inline Point3::Point3( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z )
+{
+	mVec128 = _mm_unpacklo_ps( _mm_unpacklo_ps( _x.get128(), _z.get128() ), _y.get128() );
+}
+
+inline Point3::Point3( const Vector3 &vec )
+{
+    mVec128 = vec.get128();
+}
+
+inline Point3::Point3( float scalar )
+{
+    mVec128 = floatInVec(scalar).get128();
+}
+
+inline Point3::Point3( const floatInVec &scalar )
+{
+    mVec128 = scalar.get128();
+}
+
+inline Point3::Point3( __m128 vf4 )
+{
+    mVec128 = vf4;
+}
+
+inline const Point3 lerp( float t, const Point3 &pnt0, const Point3 &pnt1 )
+{
+    return lerp( floatInVec(t), pnt0, pnt1 );
+}
+
+inline const Point3 lerp( const floatInVec &t, const Point3 &pnt0, const Point3 &pnt1 )
+{
+    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
+}
+
+inline __m128 Point3::get128( ) const
+{
+    return mVec128;
+}
+
+inline void storeXYZ( const Point3 &pnt, __m128 * quad )
+{
+    __m128 dstVec = *quad;
+	__declspec(align(16)) unsigned int sw[4] = {0, 0, 0, 0xffffffff}; // TODO: Centralize
+    dstVec = vec_sel(pnt.get128(), dstVec, sw);
+    *quad = dstVec;
+}
+
+inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const __m128 * threeQuads )
+{
+	const float *quads = (float *)threeQuads;
+    pnt0 = Point3(  _mm_load_ps(quads) );
+    pnt1 = Point3( _mm_loadu_ps(quads + 3) );
+    pnt2 = Point3( _mm_loadu_ps(quads + 6) );
+    pnt3 = Point3( _mm_loadu_ps(quads + 9) );
+}
+
+inline void storeXYZArray( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, __m128 * threeQuads )
+{
+	__m128 xxxx = _mm_shuffle_ps( pnt1.get128(), pnt1.get128(), _MM_SHUFFLE(0, 0, 0, 0) );
+	__m128 zzzz = _mm_shuffle_ps( pnt2.get128(), pnt2.get128(), _MM_SHUFFLE(2, 2, 2, 2) );
+	__declspec(align(16)) unsigned int xsw[4] = {0, 0, 0, 0xffffffff};
+	__declspec(align(16)) unsigned int zsw[4] = {0xffffffff, 0, 0, 0};
+	threeQuads[0] = vec_sel( pnt0.get128(), xxxx, xsw );
+    threeQuads[1] = _mm_shuffle_ps( pnt1.get128(), pnt2.get128(), _MM_SHUFFLE(1, 0, 2, 1) );
+    threeQuads[2] = vec_sel( _mm_shuffle_ps( pnt3.get128(), pnt3.get128(), _MM_SHUFFLE(2, 1, 0, 3) ), zzzz, zsw );
+}
+/*
+inline void storeHalfFloats( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, const Point3 &pnt4, const Point3 &pnt5, const Point3 &pnt6, const Point3 &pnt7, vec_ushort8 * threeQuads )
+{
+#if 0
+    __m128 xyz0[3];
+    __m128 xyz1[3];
+    storeXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
+    storeXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+#else
+	assert(0);
+#endif
+}
+*/
+inline Point3 & Point3::operator =( const Point3 &pnt )
+{
+    mVec128 = pnt.mVec128;
+    return *this;
+}
+
+inline Point3 & Point3::setX( float _x )
+{
+    _vmathVfSetElement(mVec128, _x, 0);
+    return *this;
+}
+
+inline Point3 & Point3::setX( const floatInVec &_x )
+{
+    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
+    return *this;
+}
+
+inline const floatInVec Point3::getX( ) const
+{
+    return floatInVec( mVec128, 0 );
+}
+
+inline Point3 & Point3::setY( float _y )
+{
+    _vmathVfSetElement(mVec128, _y, 1);
+    return *this;
+}
+
+inline Point3 & Point3::setY( const floatInVec &_y )
+{
+    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
+    return *this;
+}
+
+inline const floatInVec Point3::getY( ) const
+{
+    return floatInVec( mVec128, 1 );
+}
+
+inline Point3 & Point3::setZ( float _z )
+{
+    _vmathVfSetElement(mVec128, _z, 2);
+    return *this;
+}
+
+inline Point3 & Point3::setZ( const floatInVec &_z )
+{
+    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
+    return *this;
+}
+
+inline const floatInVec Point3::getZ( ) const
+{
+    return floatInVec( mVec128, 2 );
+}
+
+inline Point3 & Point3::setElem( int idx, float value )
+{
+    _vmathVfSetElement(mVec128, value, idx);
+    return *this;
+}
+
+inline Point3 & Point3::setElem( int idx, const floatInVec &value )
+{
+    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
+    return *this;
+}
+
+inline const floatInVec Point3::getElem( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+inline VecIdx Point3::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+inline const floatInVec Point3::operator []( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+inline const Vector3 Point3::operator -( const Point3 &pnt ) const
+{
+    return Vector3( _mm_sub_ps( mVec128, pnt.mVec128 ) );
+}
+
+inline const Point3 Point3::operator +( const Vector3 &vec ) const
+{
+    return Point3( _mm_add_ps( mVec128, vec.get128() ) );
+}
+
+inline const Point3 Point3::operator -( const Vector3 &vec ) const
+{
+    return Point3( _mm_sub_ps( mVec128, vec.get128() ) );
+}
+
+inline Point3 & Point3::operator +=( const Vector3 &vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Point3 & Point3::operator -=( const Vector3 &vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline const Point3 mulPerElem( const Point3 &pnt0, const Point3 &pnt1 )
+{
+    return Point3( _mm_mul_ps( pnt0.get128(), pnt1.get128() ) );
+}
+
+inline const Point3 divPerElem( const Point3 &pnt0, const Point3 &pnt1 )
+{
+    return Point3( _mm_div_ps( pnt0.get128(), pnt1.get128() ) );
+}
+
+inline const Point3 recipPerElem( const Point3 &pnt )
+{
+    return Point3( _mm_rcp_ps( pnt.get128() ) );
+}
+
+inline const Point3 absPerElem( const Point3 &pnt )
+{
+    return Point3( fabsf4( pnt.get128() ) );
+}
+
+inline const Point3 copySignPerElem( const Point3 &pnt0, const Point3 &pnt1 )
+{
+	__m128 vmask = toM128(0x7fffffff);
+	return Point3( _mm_or_ps(
+		_mm_and_ps   ( vmask, pnt0.get128() ),			// Value
+		_mm_andnot_ps( vmask, pnt1.get128() ) ) );		// Signs
+}
+
+inline const Point3 maxPerElem( const Point3 &pnt0, const Point3 &pnt1 )
+{
+    return Point3( _mm_max_ps( pnt0.get128(), pnt1.get128() ) );
+}
+
+inline const floatInVec maxElem( const Point3 &pnt )
+{
+    return floatInVec( _mm_max_ps( _mm_max_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) );
+}
+
+inline const Point3 minPerElem( const Point3 &pnt0, const Point3 &pnt1 )
+{
+    return Point3( _mm_min_ps( pnt0.get128(), pnt1.get128() ) );
+}
+
+inline const floatInVec minElem( const Point3 &pnt )
+{
+    return floatInVec( _mm_min_ps( _mm_min_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) );
+}
+
+inline const floatInVec sum( const Point3 &pnt )
+{
+    return floatInVec( _mm_add_ps( _mm_add_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) );
+}
+
+inline const Point3 scale( const Point3 &pnt, float scaleVal )
+{
+    return scale( pnt, floatInVec( scaleVal ) );
+}
+
+inline const Point3 scale( const Point3 &pnt, const floatInVec &scaleVal )
+{
+    return mulPerElem( pnt, Point3( scaleVal ) );
+}
+
+inline const Point3 scale( const Point3 &pnt, const Vector3 &scaleVec )
+{
+    return mulPerElem( pnt, Point3( scaleVec ) );
+}
+
+inline const floatInVec projection( const Point3 &pnt, const Vector3 &unitVec )
+{
+    return floatInVec( _vmathVfDot3( pnt.get128(), unitVec.get128() ), 0 );
+}
+
+inline const floatInVec distSqrFromOrigin( const Point3 &pnt )
+{
+    return lengthSqr( Vector3( pnt ) );
+}
+
+inline const floatInVec distFromOrigin( const Point3 &pnt )
+{
+    return length( Vector3( pnt ) );
+}
+
+inline const floatInVec distSqr( const Point3 &pnt0, const Point3 &pnt1 )
+{
+    return lengthSqr( ( pnt1 - pnt0 ) );
+}
+
+inline const floatInVec dist( const Point3 &pnt0, const Point3 &pnt1 )
+{
+    return length( ( pnt1 - pnt0 ) );
+}
+
+inline const Point3 select( const Point3 &pnt0, const Point3 &pnt1, bool select1 )
+{
+    return select( pnt0, pnt1, boolInVec(select1) );
+}
+
+inline const Point3 select( const Point3 &pnt0, const Point3 &pnt1, const boolInVec &select1 )
+{
+    return Point3( vec_sel( pnt0.get128(), pnt1.get128(), select1.get128() ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Point3 &pnt )
+{
+    union { __m128 v; float s[4]; } tmp;
+    tmp.v = pnt.get128();
+    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+inline void print( const Point3 &pnt, const char * name )
+{
+    union { __m128 v; float s[4]; } tmp;
+    tmp.v = pnt.get128();
+    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vecidx_aos.h b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vecidx_aos.h
index c5c6eb908..14dc408d0 100644
--- a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vecidx_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vecidx_aos.h
@@ -1,80 +1,80 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VECIDX_AOS_H
-#define _VECTORMATH_VECIDX_AOS_H
-
-
-#include "floatInVec.h"
-
-namespace Vectormath {
-namespace Aos {
-
-//-----------------------------------------------------------------------------
-// VecIdx 
-// Used in setting elements of Vector3, Vector4, Point3, or Quat with the 
-// subscripting operator.
-//
-
-__declspec(align(16)) class VecIdx
-{
-private:
-   __m128 &ref;
-   int i;
-public:
-    inline VecIdx( __m128& vec, int idx ): ref(vec) { i = idx; }
-
-    // implicitly casts to float unless _VECTORMATH_NO_SCALAR_CAST defined
-    // in which case, implicitly casts to floatInVec, and one must call
-    // getAsFloat to convert to float.
-    //
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-    inline operator floatInVec() const;
-    inline float getAsFloat() const;
-#else
-    inline operator float() const;
-#endif
-
-    inline float operator =( float scalar );
-    inline floatInVec operator =( const floatInVec &scalar );
-    inline floatInVec operator =( const VecIdx& scalar );
-    inline floatInVec operator *=( float scalar );
-    inline floatInVec operator *=( const floatInVec &scalar );
-    inline floatInVec operator /=( float scalar );
-    inline floatInVec operator /=( const floatInVec &scalar );
-    inline floatInVec operator +=( float scalar );
-    inline floatInVec operator +=( const floatInVec &scalar );
-    inline floatInVec operator -=( float scalar );
-    inline floatInVec operator -=( const floatInVec &scalar );
-};
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VECIDX_AOS_H
+#define _VECTORMATH_VECIDX_AOS_H
+
+
+#include "floatInVec.h"
+
+namespace Vectormath {
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// VecIdx 
+// Used in setting elements of Vector3, Vector4, Point3, or Quat with the 
+// subscripting operator.
+//
+
+__declspec(align(16)) class VecIdx
+{
+private:
+   __m128 &ref;
+   int i;
+public:
+    inline VecIdx( __m128& vec, int idx ): ref(vec) { i = idx; }
+
+    // implicitly casts to float unless _VECTORMATH_NO_SCALAR_CAST defined
+    // in which case, implicitly casts to floatInVec, and one must call
+    // getAsFloat to convert to float.
+    //
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+    inline operator floatInVec() const;
+    inline float getAsFloat() const;
+#else
+    inline operator float() const;
+#endif
+
+    inline float operator =( float scalar );
+    inline floatInVec operator =( const floatInVec &scalar );
+    inline floatInVec operator =( const VecIdx& scalar );
+    inline floatInVec operator *=( float scalar );
+    inline floatInVec operator *=( const floatInVec &scalar );
+    inline floatInVec operator /=( float scalar );
+    inline floatInVec operator /=( const floatInVec &scalar );
+    inline floatInVec operator +=( float scalar );
+    inline floatInVec operator +=( const floatInVec &scalar );
+    inline floatInVec operator -=( float scalar );
+    inline floatInVec operator -=( const floatInVec &scalar );
+};
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vectormath_aos.h b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vectormath_aos.h
index 94825a583..c07fa3acc 100644
--- a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vectormath_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vectormath_aos.h
@@ -1,2491 +1,2491 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_AOS_CPP_SSE_H
-#define _VECTORMATH_AOS_CPP_SSE_H
-
-#include <math.h>
-#include <xmmintrin.h>
-#include <emmintrin.h>
-#include <assert.h>
-
-// TODO: Tidy
-typedef __m128 vec_float4;
-typedef __m128 vec_uint4;
-typedef __m128 vec_int4;
-typedef __m128i vec_uchar16;
-typedef __m128i vec_ushort8;
-
-#define vec_splat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e,e,e,e))
-
-#define _mm_ror_ps(vec,i)	\
-	(((i)%4) ? (_mm_shuffle_ps(vec,vec, _MM_SHUFFLE((unsigned char)(i+3)%4,(unsigned char)(i+2)%4,(unsigned char)(i+1)%4,(unsigned char)(i+0)%4))) : (vec))
-#define _mm_rol_ps(vec,i)	\
-	(((i)%4) ? (_mm_shuffle_ps(vec,vec, _MM_SHUFFLE((unsigned char)(7-i)%4,(unsigned char)(6-i)%4,(unsigned char)(5-i)%4,(unsigned char)(4-i)%4))) : (vec))
-
-#define vec_sld(vec,vec2,x) _mm_ror_ps(vec, ((x)/4))
-
-#define _mm_abs_ps(vec)		_mm_andnot_ps(_MASKSIGN_,vec)
-#define _mm_neg_ps(vec)		_mm_xor_ps(_MASKSIGN_,vec)
-
-#define vec_madd(a, b, c) _mm_add_ps(c, _mm_mul_ps(a, b) )
-
-union SSEFloat
-{
-	__m128 m128;
-	float f[4];
-};
-
-static inline __m128 vec_sel(__m128 a, __m128 b, __m128 mask)
-{
-	return _mm_or_ps(_mm_and_ps(mask, b), _mm_andnot_ps(mask, a));
-}
-static inline __m128 vec_sel(__m128 a, __m128 b, const unsigned int *_mask)
-{
-	return vec_sel(a, b, _mm_load_ps((float *)_mask));
-}
-static inline __m128 vec_sel(__m128 a, __m128 b, unsigned int _mask)
-{
-	return vec_sel(a, b, _mm_set1_ps(*(float *)&_mask));
-}
-
-static inline __m128 toM128(unsigned int x)
-{
-    return _mm_set1_ps( *(float *)&x );
-}
-
-static inline __m128 fabsf4(__m128 x)
-{
-    return _mm_and_ps( x, toM128( 0x7fffffff ) );
-}
-/*
-union SSE64
-{
-	__m128 m128;
-	struct
-	{
-		__m64 m01;
-		__m64 m23;
-	} m64;
-};
-
-static inline __m128 vec_cts(__m128 x, int a)
-{
-	assert(a == 0); // Only 2^0 supported
-	(void)a;
-	SSE64 sse64;
-	sse64.m64.m01 = _mm_cvttps_pi32(x);
-	sse64.m64.m23 = _mm_cvttps_pi32(_mm_ror_ps(x,2));
-	_mm_empty();
-    return sse64.m128;
-}
-
-static inline __m128 vec_ctf(__m128 x, int a)
-{
-	assert(a == 0); // Only 2^0 supported
-	(void)a;
-	SSE64 sse64;
-	sse64.m128 = x;
-	__m128 result =_mm_movelh_ps(
-		_mm_cvt_pi2ps(_mm_setzero_ps(), sse64.m64.m01),
-		_mm_cvt_pi2ps(_mm_setzero_ps(), sse64.m64.m23));
-	_mm_empty();
-	return result;
-}
-*/
-static inline __m128 vec_cts(__m128 x, int a)
-{
-	assert(a == 0); // Only 2^0 supported
-	(void)a;
-	__m128i result = _mm_cvtps_epi32(x);
-    return (__m128 &)result;
-}
-
-static inline __m128 vec_ctf(__m128 x, int a)
-{
-	assert(a == 0); // Only 2^0 supported
-	(void)a;
-	return _mm_cvtepi32_ps((__m128i &)x);
-}
-
-#define vec_nmsub(a,b,c) _mm_sub_ps( c, _mm_mul_ps( a, b ) )
-#define vec_sub(a,b) _mm_sub_ps( a, b )
-#define vec_add(a,b) _mm_add_ps( a, b )
-#define vec_mul(a,b) _mm_mul_ps( a, b )
-#define vec_xor(a,b) _mm_xor_ps( a, b )
-#define vec_and(a,b) _mm_and_ps( a, b )
-#define vec_cmpeq(a,b) _mm_cmpeq_ps( a, b )
-#define vec_cmpgt(a,b) _mm_cmpgt_ps( a, b )
-
-#define vec_mergeh(a,b) _mm_unpacklo_ps( a, b )
-#define vec_mergel(a,b) _mm_unpackhi_ps( a, b )
-
-#define vec_andc(a,b) _mm_andnot_ps( b, a )
-
-#define sqrtf4(x) _mm_sqrt_ps( x )
-#define rsqrtf4(x) _mm_rsqrt_ps( x )
-#define recipf4(x) _mm_rcp_ps( x )
-#define negatef4(x) _mm_sub_ps( _mm_setzero_ps(), x )
-
-static inline __m128 acosf4(__m128 x)
-{
-    __m128 xabs = fabsf4(x);
-	__m128 select = _mm_cmplt_ps( x, _mm_setzero_ps() );
-    __m128 t1 = sqrtf4(vec_sub(_mm_set1_ps(1.0f), xabs));
-    
-    /* Instruction counts can be reduced if the polynomial was
-     * computed entirely from nested (dependent) fma's. However, 
-     * to reduce the number of pipeline stalls, the polygon is evaluated 
-     * in two halves (hi amd lo). 
-     */
-    __m128 xabs2 = _mm_mul_ps(xabs,  xabs);
-    __m128 xabs4 = _mm_mul_ps(xabs2, xabs2);
-    __m128 hi = vec_madd(vec_madd(vec_madd(_mm_set1_ps(-0.0012624911f),
-		xabs, _mm_set1_ps(0.0066700901f)),
-			xabs, _mm_set1_ps(-0.0170881256f)),
-				xabs, _mm_set1_ps( 0.0308918810f));
-    __m128 lo = vec_madd(vec_madd(vec_madd(_mm_set1_ps(-0.0501743046f),
-		xabs, _mm_set1_ps(0.0889789874f)),
-			xabs, _mm_set1_ps(-0.2145988016f)),
-				xabs, _mm_set1_ps( 1.5707963050f));
-    
-    __m128 result = vec_madd(hi, xabs4, lo);
-    
-    // Adjust the result if x is negactive.
-    return vec_sel(
-		vec_mul(t1, result),									// Positive
-		vec_nmsub(t1, result, _mm_set1_ps(3.1415926535898f)),	// Negative
-		select);
-}
-
-static inline __m128 sinf4(vec_float4 x)
-{
-
-//
-// Common constants used to evaluate sinf4/cosf4/tanf4
-//
-#define _SINCOS_CC0  -0.0013602249f
-#define _SINCOS_CC1   0.0416566950f
-#define _SINCOS_CC2  -0.4999990225f
-#define _SINCOS_SC0  -0.0001950727f
-#define _SINCOS_SC1   0.0083320758f
-#define _SINCOS_SC2  -0.1666665247f
-
-#define _SINCOS_KC1  1.57079625129f
-#define _SINCOS_KC2  7.54978995489e-8f
-
-    vec_float4 xl,xl2,xl3,res;
-
-    // Range reduction using : xl = angle * TwoOverPi;
-    //  
-    xl = vec_mul(x, _mm_set1_ps(0.63661977236f));
-
-    // Find the quadrant the angle falls in
-    // using:  q = (int) (ceil(abs(xl))*sign(xl))
-    //
-    vec_int4 q = vec_cts(xl,0);
-
-    // Compute an offset based on the quadrant that the angle falls in
-    // 
-    vec_int4 offset = _mm_and_ps(q,toM128(0x3));
-
-    // Remainder in range [-pi/4..pi/4]
-    //
-    vec_float4 qf = vec_ctf(q,0);
-    xl  = vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC2),vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC1),x));
-    
-    // Compute x^2 and x^3
-    //
-    xl2 = vec_mul(xl,xl);
-    xl3 = vec_mul(xl2,xl);
-    
-    // Compute both the sin and cos of the angles
-    // using a polynomial expression:
-    //   cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and
-    //   sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2)
-    //
-    
-    vec_float4 cx =
-		vec_madd(
-			vec_madd(
-				vec_madd(_mm_set1_ps(_SINCOS_CC0),xl2,_mm_set1_ps(_SINCOS_CC1)),xl2,_mm_set1_ps(_SINCOS_CC2)),xl2,_mm_set1_ps(1.0f));
-    vec_float4 sx =
-		vec_madd(
-			vec_madd(
-				vec_madd(_mm_set1_ps(_SINCOS_SC0),xl2,_mm_set1_ps(_SINCOS_SC1)),xl2,_mm_set1_ps(_SINCOS_SC2)),xl3,xl);
-
-    // Use the cosine when the offset is odd and the sin
-    // when the offset is even
-    //
-    res = vec_sel(cx,sx,vec_cmpeq(vec_and(offset,
-                                          toM128(0x1)),
-										  _mm_setzero_ps()));
-
-    // Flip the sign of the result when (offset mod 4) = 1 or 2
-    //
-    return vec_sel(
-		vec_xor(toM128(0x80000000U), res),	// Negative
-		res,								// Positive
-		vec_cmpeq(vec_and(offset,toM128(0x2)),_mm_setzero_ps()));
-}
-
-static inline void sincosf4(vec_float4 x, vec_float4* s, vec_float4* c)
-{
-    vec_float4 xl,xl2,xl3;
-    vec_int4   offsetSin, offsetCos;
-
-    // Range reduction using : xl = angle * TwoOverPi;
-    //  
-    xl = vec_mul(x, _mm_set1_ps(0.63661977236f));
-
-    // Find the quadrant the angle falls in
-    // using:  q = (int) (ceil(abs(xl))*sign(xl))
-    //
-    //vec_int4 q = vec_cts(vec_add(xl,vec_sel(_mm_set1_ps(0.5f),xl,(0x80000000))),0);
-    vec_int4 q = vec_cts(xl,0);
-     
-    // Compute the offset based on the quadrant that the angle falls in.
-    // Add 1 to the offset for the cosine. 
-    //
-    offsetSin = vec_and(q,toM128((int)0x3));
-	__m128i temp = _mm_add_epi32(_mm_set1_epi32(1),(__m128i &)offsetSin);
-	offsetCos = (__m128 &)temp;
-
-    // Remainder in range [-pi/4..pi/4]
-    //
-    vec_float4 qf = vec_ctf(q,0);
-    xl  = vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC2),vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC1),x));
-    
-    // Compute x^2 and x^3
-    //
-    xl2 = vec_mul(xl,xl);
-    xl3 = vec_mul(xl2,xl);
-    
-    // Compute both the sin and cos of the angles
-    // using a polynomial expression:
-    //   cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and
-    //   sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2)
-    //
-    vec_float4 cx =
-		vec_madd(
-			vec_madd(
-				vec_madd(_mm_set1_ps(_SINCOS_CC0),xl2,_mm_set1_ps(_SINCOS_CC1)),xl2,_mm_set1_ps(_SINCOS_CC2)),xl2,_mm_set1_ps(1.0f));
-    vec_float4 sx =
-		vec_madd(
-			vec_madd(
-				vec_madd(_mm_set1_ps(_SINCOS_SC0),xl2,_mm_set1_ps(_SINCOS_SC1)),xl2,_mm_set1_ps(_SINCOS_SC2)),xl3,xl);
-
-    // Use the cosine when the offset is odd and the sin
-    // when the offset is even
-    //
-    vec_uint4 sinMask = (vec_uint4)vec_cmpeq(vec_and(offsetSin,toM128(0x1)),_mm_setzero_ps());
-    vec_uint4 cosMask = (vec_uint4)vec_cmpeq(vec_and(offsetCos,toM128(0x1)),_mm_setzero_ps());    
-    *s = vec_sel(cx,sx,sinMask);
-    *c = vec_sel(cx,sx,cosMask);
-
-    // Flip the sign of the result when (offset mod 4) = 1 or 2
-    //
-    sinMask = vec_cmpeq(vec_and(offsetSin,toM128(0x2)),_mm_setzero_ps());
-    cosMask = vec_cmpeq(vec_and(offsetCos,toM128(0x2)),_mm_setzero_ps());
-    
-    *s = vec_sel((vec_float4)vec_xor(toM128(0x80000000),(vec_uint4)*s),*s,sinMask);
-    *c = vec_sel((vec_float4)vec_xor(toM128(0x80000000),(vec_uint4)*c),*c,cosMask);    
-}
-
-#include "vecidx_aos.h"
-#include "floatInVec.h"
-#include "boolInVec.h"
-
-#ifdef _VECTORMATH_DEBUG
-#include <stdio.h>
-#endif
-namespace Vectormath {
-
-namespace Aos {
-
-//-----------------------------------------------------------------------------
-// Forward Declarations
-//
-
-class Vector3;
-class Vector4;
-class Point3;
-class Quat;
-class Matrix3;
-class Matrix4;
-class Transform3;
-
-// A 3-D vector in array-of-structures format
-//
-class Vector3
-{
-    __m128 mVec128;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Vector3( ) { };
-
-    // Construct a 3-D vector from x, y, and z elements
-    // 
-    inline Vector3( float x, float y, float z );
-
-    // Construct a 3-D vector from x, y, and z elements (scalar data contained in vector data type)
-    // 
-    inline Vector3( const floatInVec &x, const floatInVec &y, const floatInVec &z );
-
-    // Copy elements from a 3-D point into a 3-D vector
-    // 
-    explicit inline Vector3( const Point3 &pnt );
-
-    // Set all elements of a 3-D vector to the same scalar value
-    // 
-    explicit inline Vector3( float scalar );
-
-    // Set all elements of a 3-D vector to the same scalar value (scalar data contained in vector data type)
-    // 
-    explicit inline Vector3( const floatInVec &scalar );
-
-    // Set vector float data in a 3-D vector
-    // 
-    explicit inline Vector3( __m128 vf4 );
-
-    // Get vector float data from a 3-D vector
-    // 
-    inline __m128 get128( ) const;
-
-    // Assign one 3-D vector to another
-    // 
-    inline Vector3 & operator =( const Vector3 &vec );
-
-    // Set the x element of a 3-D vector
-    // 
-    inline Vector3 & setX( float x );
-
-    // Set the y element of a 3-D vector
-    // 
-    inline Vector3 & setY( float y );
-
-    // Set the z element of a 3-D vector
-    // 
-    inline Vector3 & setZ( float z );
-
-    // Set the x element of a 3-D vector (scalar data contained in vector data type)
-    // 
-    inline Vector3 & setX( const floatInVec &x );
-
-    // Set the y element of a 3-D vector (scalar data contained in vector data type)
-    // 
-    inline Vector3 & setY( const floatInVec &y );
-
-    // Set the z element of a 3-D vector (scalar data contained in vector data type)
-    // 
-    inline Vector3 & setZ( const floatInVec &z );
-
-    // Get the x element of a 3-D vector
-    // 
-    inline const floatInVec getX( ) const;
-
-    // Get the y element of a 3-D vector
-    // 
-    inline const floatInVec getY( ) const;
-
-    // Get the z element of a 3-D vector
-    // 
-    inline const floatInVec getZ( ) const;
-
-    // Set an x, y, or z element of a 3-D vector by index
-    // 
-    inline Vector3 & setElem( int idx, float value );
-
-    // Set an x, y, or z element of a 3-D vector by index (scalar data contained in vector data type)
-    // 
-    inline Vector3 & setElem( int idx, const floatInVec &value );
-
-    // Get an x, y, or z element of a 3-D vector by index
-    // 
-    inline const floatInVec getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline VecIdx operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline const floatInVec operator []( int idx ) const;
-
-    // Add two 3-D vectors
-    // 
-    inline const Vector3 operator +( const Vector3 &vec ) const;
-
-    // Subtract a 3-D vector from another 3-D vector
-    // 
-    inline const Vector3 operator -( const Vector3 &vec ) const;
-
-    // Add a 3-D vector to a 3-D point
-    // 
-    inline const Point3 operator +( const Point3 &pnt ) const;
-
-    // Multiply a 3-D vector by a scalar
-    // 
-    inline const Vector3 operator *( float scalar ) const;
-
-    // Divide a 3-D vector by a scalar
-    // 
-    inline const Vector3 operator /( float scalar ) const;
-
-    // Multiply a 3-D vector by a scalar (scalar data contained in vector data type)
-    // 
-    inline const Vector3 operator *( const floatInVec &scalar ) const;
-
-    // Divide a 3-D vector by a scalar (scalar data contained in vector data type)
-    // 
-    inline const Vector3 operator /( const floatInVec &scalar ) const;
-
-    // Perform compound assignment and addition with a 3-D vector
-    // 
-    inline Vector3 & operator +=( const Vector3 &vec );
-
-    // Perform compound assignment and subtraction by a 3-D vector
-    // 
-    inline Vector3 & operator -=( const Vector3 &vec );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Vector3 & operator *=( float scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Vector3 & operator /=( float scalar );
-
-    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
-    // 
-    inline Vector3 & operator *=( const floatInVec &scalar );
-
-    // Perform compound assignment and division by a scalar (scalar data contained in vector data type)
-    // 
-    inline Vector3 & operator /=( const floatInVec &scalar );
-
-    // Negate all elements of a 3-D vector
-    // 
-    inline const Vector3 operator -( ) const;
-
-    // Construct x axis
-    // 
-    static inline const Vector3 xAxis( );
-
-    // Construct y axis
-    // 
-    static inline const Vector3 yAxis( );
-
-    // Construct z axis
-    // 
-    static inline const Vector3 zAxis( );
-
-};
-
-// Multiply a 3-D vector by a scalar
-// 
-inline const Vector3 operator *( float scalar, const Vector3 &vec );
-
-// Multiply a 3-D vector by a scalar (scalar data contained in vector data type)
-// 
-inline const Vector3 operator *( const floatInVec &scalar, const Vector3 &vec );
-
-// Multiply two 3-D vectors per element
-// 
-inline const Vector3 mulPerElem( const Vector3 &vec0, const Vector3 &vec1 );
-
-// Divide two 3-D vectors per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Vector3 divPerElem( const Vector3 &vec0, const Vector3 &vec1 );
-
-// Compute the reciprocal of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Vector3 recipPerElem( const Vector3 &vec );
-
-// Compute the absolute value of a 3-D vector per element
-// 
-inline const Vector3 absPerElem( const Vector3 &vec );
-
-// Copy sign from one 3-D vector to another, per element
-// 
-inline const Vector3 copySignPerElem( const Vector3 &vec0, const Vector3 &vec1 );
-
-// Maximum of two 3-D vectors per element
-// 
-inline const Vector3 maxPerElem( const Vector3 &vec0, const Vector3 &vec1 );
-
-// Minimum of two 3-D vectors per element
-// 
-inline const Vector3 minPerElem( const Vector3 &vec0, const Vector3 &vec1 );
-
-// Maximum element of a 3-D vector
-// 
-inline const floatInVec maxElem( const Vector3 &vec );
-
-// Minimum element of a 3-D vector
-// 
-inline const floatInVec minElem( const Vector3 &vec );
-
-// Compute the sum of all elements of a 3-D vector
-// 
-inline const floatInVec sum( const Vector3 &vec );
-
-// Compute the dot product of two 3-D vectors
-// 
-inline const floatInVec dot( const Vector3 &vec0, const Vector3 &vec1 );
-
-// Compute the square of the length of a 3-D vector
-// 
-inline const floatInVec lengthSqr( const Vector3 &vec );
-
-// Compute the length of a 3-D vector
-// 
-inline const floatInVec length( const Vector3 &vec );
-
-// Normalize a 3-D vector
-// NOTE: 
-// The result is unpredictable when all elements of vec are at or near zero.
-// 
-inline const Vector3 normalize( const Vector3 &vec );
-
-// Compute cross product of two 3-D vectors
-// 
-inline const Vector3 cross( const Vector3 &vec0, const Vector3 &vec1 );
-
-// Outer product of two 3-D vectors
-// 
-inline const Matrix3 outer( const Vector3 &vec0, const Vector3 &vec1 );
-
-// Pre-multiply a row vector by a 3x3 matrix
-// NOTE: 
-// Slower than column post-multiply.
-// 
-inline const Vector3 rowMul( const Vector3 &vec, const Matrix3 & mat );
-
-// Cross-product matrix of a 3-D vector
-// 
-inline const Matrix3 crossMatrix( const Vector3 &vec );
-
-// Create cross-product matrix and multiply
-// NOTE: 
-// Faster than separately creating a cross-product matrix and multiplying.
-// 
-inline const Matrix3 crossMatrixMul( const Vector3 &vec, const Matrix3 & mat );
-
-// Linear interpolation between two 3-D vectors
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 lerp( float t, const Vector3 &vec0, const Vector3 &vec1 );
-
-// Linear interpolation between two 3-D vectors (scalar data contained in vector data type)
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 lerp( const floatInVec &t, const Vector3 &vec0, const Vector3 &vec1 );
-
-// Spherical linear interpolation between two 3-D vectors
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 slerp( float t, const Vector3 &unitVec0, const Vector3 &unitVec1 );
-
-// Spherical linear interpolation between two 3-D vectors (scalar data contained in vector data type)
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 slerp( const floatInVec &t, const Vector3 &unitVec0, const Vector3 &unitVec1 );
-
-// Conditionally select between two 3-D vectors
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// However, the transfer of select1 to a VMX register may use more processing time than a branch.
-// Use the boolInVec version for better performance.
-// 
-inline const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, bool select1 );
-
-// Conditionally select between two 3-D vectors (scalar data contained in vector data type)
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, const boolInVec &select1 );
-
-// Store x, y, and z elements of 3-D vector in first three words of a quadword, preserving fourth word
-// 
-inline void storeXYZ( const Vector3 &vec, __m128 * quad );
-
-// Load four three-float 3-D vectors, stored in three quadwords
-// 
-inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const __m128 * threeQuads );
-
-// Store four 3-D vectors in three quadwords
-// 
-inline void storeXYZArray( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, __m128 * threeQuads );
-
-// Store eight 3-D vectors as half-floats
-// 
-inline void storeHalfFloats( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, const Vector3 &vec4, const Vector3 &vec5, const Vector3 &vec6, const Vector3 &vec7, vec_ushort8 * threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3-D vector
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector3 &vec );
-
-// Print a 3-D vector and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector3 &vec, const char * name );
-
-#endif
-
-// A 4-D vector in array-of-structures format
-//
-class Vector4
-{
-    __m128 mVec128;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Vector4( ) { };
-
-    // Construct a 4-D vector from x, y, z, and w elements
-    // 
-    inline Vector4( float x, float y, float z, float w );
-
-    // Construct a 4-D vector from x, y, z, and w elements (scalar data contained in vector data type)
-    // 
-    inline Vector4( const floatInVec &x, const floatInVec &y, const floatInVec &z, const floatInVec &w );
-
-    // Construct a 4-D vector from a 3-D vector and a scalar
-    // 
-    inline Vector4( const Vector3 &xyz, float w );
-
-    // Construct a 4-D vector from a 3-D vector and a scalar (scalar data contained in vector data type)
-    // 
-    inline Vector4( const Vector3 &xyz, const floatInVec &w );
-
-    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
-    // 
-    explicit inline Vector4( const Vector3 &vec );
-
-    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
-    // 
-    explicit inline Vector4( const Point3 &pnt );
-
-    // Copy elements from a quaternion into a 4-D vector
-    // 
-    explicit inline Vector4( const Quat &quat );
-
-    // Set all elements of a 4-D vector to the same scalar value
-    // 
-    explicit inline Vector4( float scalar );
-
-    // Set all elements of a 4-D vector to the same scalar value (scalar data contained in vector data type)
-    // 
-    explicit inline Vector4( const floatInVec &scalar );
-
-    // Set vector float data in a 4-D vector
-    // 
-    explicit inline Vector4( __m128 vf4 );
-
-    // Get vector float data from a 4-D vector
-    // 
-    inline __m128 get128( ) const;
-
-    // Assign one 4-D vector to another
-    // 
-    inline Vector4 & operator =( const Vector4 &vec );
-
-    // Set the x, y, and z elements of a 4-D vector
-    // NOTE: 
-    // This function does not change the w element.
-    // 
-    inline Vector4 & setXYZ( const Vector3 &vec );
-
-    // Get the x, y, and z elements of a 4-D vector
-    // 
-    inline const Vector3 getXYZ( ) const;
-
-    // Set the x element of a 4-D vector
-    // 
-    inline Vector4 & setX( float x );
-
-    // Set the y element of a 4-D vector
-    // 
-    inline Vector4 & setY( float y );
-
-    // Set the z element of a 4-D vector
-    // 
-    inline Vector4 & setZ( float z );
-
-    // Set the w element of a 4-D vector
-    // 
-    inline Vector4 & setW( float w );
-
-    // Set the x element of a 4-D vector (scalar data contained in vector data type)
-    // 
-    inline Vector4 & setX( const floatInVec &x );
-
-    // Set the y element of a 4-D vector (scalar data contained in vector data type)
-    // 
-    inline Vector4 & setY( const floatInVec &y );
-
-    // Set the z element of a 4-D vector (scalar data contained in vector data type)
-    // 
-    inline Vector4 & setZ( const floatInVec &z );
-
-    // Set the w element of a 4-D vector (scalar data contained in vector data type)
-    // 
-    inline Vector4 & setW( const floatInVec &w );
-
-    // Get the x element of a 4-D vector
-    // 
-    inline const floatInVec getX( ) const;
-
-    // Get the y element of a 4-D vector
-    // 
-    inline const floatInVec getY( ) const;
-
-    // Get the z element of a 4-D vector
-    // 
-    inline const floatInVec getZ( ) const;
-
-    // Get the w element of a 4-D vector
-    // 
-    inline const floatInVec getW( ) const;
-
-    // Set an x, y, z, or w element of a 4-D vector by index
-    // 
-    inline Vector4 & setElem( int idx, float value );
-
-    // Set an x, y, z, or w element of a 4-D vector by index (scalar data contained in vector data type)
-    // 
-    inline Vector4 & setElem( int idx, const floatInVec &value );
-
-    // Get an x, y, z, or w element of a 4-D vector by index
-    // 
-    inline const floatInVec getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline VecIdx operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline const floatInVec operator []( int idx ) const;
-
-    // Add two 4-D vectors
-    // 
-    inline const Vector4 operator +( const Vector4 &vec ) const;
-
-    // Subtract a 4-D vector from another 4-D vector
-    // 
-    inline const Vector4 operator -( const Vector4 &vec ) const;
-
-    // Multiply a 4-D vector by a scalar
-    // 
-    inline const Vector4 operator *( float scalar ) const;
-
-    // Divide a 4-D vector by a scalar
-    // 
-    inline const Vector4 operator /( float scalar ) const;
-
-    // Multiply a 4-D vector by a scalar (scalar data contained in vector data type)
-    // 
-    inline const Vector4 operator *( const floatInVec &scalar ) const;
-
-    // Divide a 4-D vector by a scalar (scalar data contained in vector data type)
-    // 
-    inline const Vector4 operator /( const floatInVec &scalar ) const;
-
-    // Perform compound assignment and addition with a 4-D vector
-    // 
-    inline Vector4 & operator +=( const Vector4 &vec );
-
-    // Perform compound assignment and subtraction by a 4-D vector
-    // 
-    inline Vector4 & operator -=( const Vector4 &vec );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Vector4 & operator *=( float scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Vector4 & operator /=( float scalar );
-
-    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
-    // 
-    inline Vector4 & operator *=( const floatInVec &scalar );
-
-    // Perform compound assignment and division by a scalar (scalar data contained in vector data type)
-    // 
-    inline Vector4 & operator /=( const floatInVec &scalar );
-
-    // Negate all elements of a 4-D vector
-    // 
-    inline const Vector4 operator -( ) const;
-
-    // Construct x axis
-    // 
-    static inline const Vector4 xAxis( );
-
-    // Construct y axis
-    // 
-    static inline const Vector4 yAxis( );
-
-    // Construct z axis
-    // 
-    static inline const Vector4 zAxis( );
-
-    // Construct w axis
-    // 
-    static inline const Vector4 wAxis( );
-
-};
-
-// Multiply a 4-D vector by a scalar
-// 
-inline const Vector4 operator *( float scalar, const Vector4 &vec );
-
-// Multiply a 4-D vector by a scalar (scalar data contained in vector data type)
-// 
-inline const Vector4 operator *( const floatInVec &scalar, const Vector4 &vec );
-
-// Multiply two 4-D vectors per element
-// 
-inline const Vector4 mulPerElem( const Vector4 &vec0, const Vector4 &vec1 );
-
-// Divide two 4-D vectors per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Vector4 divPerElem( const Vector4 &vec0, const Vector4 &vec1 );
-
-// Compute the reciprocal of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Vector4 recipPerElem( const Vector4 &vec );
-
-// Compute the absolute value of a 4-D vector per element
-// 
-inline const Vector4 absPerElem( const Vector4 &vec );
-
-// Copy sign from one 4-D vector to another, per element
-// 
-inline const Vector4 copySignPerElem( const Vector4 &vec0, const Vector4 &vec1 );
-
-// Maximum of two 4-D vectors per element
-// 
-inline const Vector4 maxPerElem( const Vector4 &vec0, const Vector4 &vec1 );
-
-// Minimum of two 4-D vectors per element
-// 
-inline const Vector4 minPerElem( const Vector4 &vec0, const Vector4 &vec1 );
-
-// Maximum element of a 4-D vector
-// 
-inline const floatInVec maxElem( const Vector4 &vec );
-
-// Minimum element of a 4-D vector
-// 
-inline const floatInVec minElem( const Vector4 &vec );
-
-// Compute the sum of all elements of a 4-D vector
-// 
-inline const floatInVec sum( const Vector4 &vec );
-
-// Compute the dot product of two 4-D vectors
-// 
-inline const floatInVec dot( const Vector4 &vec0, const Vector4 &vec1 );
-
-// Compute the square of the length of a 4-D vector
-// 
-inline const floatInVec lengthSqr( const Vector4 &vec );
-
-// Compute the length of a 4-D vector
-// 
-inline const floatInVec length( const Vector4 &vec );
-
-// Normalize a 4-D vector
-// NOTE: 
-// The result is unpredictable when all elements of vec are at or near zero.
-// 
-inline const Vector4 normalize( const Vector4 &vec );
-
-// Outer product of two 4-D vectors
-// 
-inline const Matrix4 outer( const Vector4 &vec0, const Vector4 &vec1 );
-
-// Linear interpolation between two 4-D vectors
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 lerp( float t, const Vector4 &vec0, const Vector4 &vec1 );
-
-// Linear interpolation between two 4-D vectors (scalar data contained in vector data type)
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 lerp( const floatInVec &t, const Vector4 &vec0, const Vector4 &vec1 );
-
-// Spherical linear interpolation between two 4-D vectors
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 slerp( float t, const Vector4 &unitVec0, const Vector4 &unitVec1 );
-
-// Spherical linear interpolation between two 4-D vectors (scalar data contained in vector data type)
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 slerp( const floatInVec &t, const Vector4 &unitVec0, const Vector4 &unitVec1 );
-
-// Conditionally select between two 4-D vectors
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// However, the transfer of select1 to a VMX register may use more processing time than a branch.
-// Use the boolInVec version for better performance.
-// 
-inline const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, bool select1 );
-
-// Conditionally select between two 4-D vectors (scalar data contained in vector data type)
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, const boolInVec &select1 );
-
-// Store four 4-D vectors as half-floats
-// 
-inline void storeHalfFloats( const Vector4 &vec0, const Vector4 &vec1, const Vector4 &vec2, const Vector4 &vec3, vec_ushort8 * twoQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 4-D vector
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector4 &vec );
-
-// Print a 4-D vector and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector4 &vec, const char * name );
-
-#endif
-
-// A 3-D point in array-of-structures format
-//
-class Point3
-{
-    __m128 mVec128;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Point3( ) { };
-
-    // Construct a 3-D point from x, y, and z elements
-    // 
-    inline Point3( float x, float y, float z );
-
-    // Construct a 3-D point from x, y, and z elements (scalar data contained in vector data type)
-    // 
-    inline Point3( const floatInVec &x, const floatInVec &y, const floatInVec &z );
-
-    // Copy elements from a 3-D vector into a 3-D point
-    // 
-    explicit inline Point3( const Vector3 &vec );
-
-    // Set all elements of a 3-D point to the same scalar value
-    // 
-    explicit inline Point3( float scalar );
-
-    // Set all elements of a 3-D point to the same scalar value (scalar data contained in vector data type)
-    // 
-    explicit inline Point3( const floatInVec &scalar );
-
-    // Set vector float data in a 3-D point
-    // 
-    explicit inline Point3( __m128 vf4 );
-
-    // Get vector float data from a 3-D point
-    // 
-    inline __m128 get128( ) const;
-
-    // Assign one 3-D point to another
-    // 
-    inline Point3 & operator =( const Point3 &pnt );
-
-    // Set the x element of a 3-D point
-    // 
-    inline Point3 & setX( float x );
-
-    // Set the y element of a 3-D point
-    // 
-    inline Point3 & setY( float y );
-
-    // Set the z element of a 3-D point
-    // 
-    inline Point3 & setZ( float z );
-
-    // Set the x element of a 3-D point (scalar data contained in vector data type)
-    // 
-    inline Point3 & setX( const floatInVec &x );
-
-    // Set the y element of a 3-D point (scalar data contained in vector data type)
-    // 
-    inline Point3 & setY( const floatInVec &y );
-
-    // Set the z element of a 3-D point (scalar data contained in vector data type)
-    // 
-    inline Point3 & setZ( const floatInVec &z );
-
-    // Get the x element of a 3-D point
-    // 
-    inline const floatInVec getX( ) const;
-
-    // Get the y element of a 3-D point
-    // 
-    inline const floatInVec getY( ) const;
-
-    // Get the z element of a 3-D point
-    // 
-    inline const floatInVec getZ( ) const;
-
-    // Set an x, y, or z element of a 3-D point by index
-    // 
-    inline Point3 & setElem( int idx, float value );
-
-    // Set an x, y, or z element of a 3-D point by index (scalar data contained in vector data type)
-    // 
-    inline Point3 & setElem( int idx, const floatInVec &value );
-
-    // Get an x, y, or z element of a 3-D point by index
-    // 
-    inline const floatInVec getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline VecIdx operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline const floatInVec operator []( int idx ) const;
-
-    // Subtract a 3-D point from another 3-D point
-    // 
-    inline const Vector3 operator -( const Point3 &pnt ) const;
-
-    // Add a 3-D point to a 3-D vector
-    // 
-    inline const Point3 operator +( const Vector3 &vec ) const;
-
-    // Subtract a 3-D vector from a 3-D point
-    // 
-    inline const Point3 operator -( const Vector3 &vec ) const;
-
-    // Perform compound assignment and addition with a 3-D vector
-    // 
-    inline Point3 & operator +=( const Vector3 &vec );
-
-    // Perform compound assignment and subtraction by a 3-D vector
-    // 
-    inline Point3 & operator -=( const Vector3 &vec );
-
-};
-
-// Multiply two 3-D points per element
-// 
-inline const Point3 mulPerElem( const Point3 &pnt0, const Point3 &pnt1 );
-
-// Divide two 3-D points per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Point3 divPerElem( const Point3 &pnt0, const Point3 &pnt1 );
-
-// Compute the reciprocal of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Point3 recipPerElem( const Point3 &pnt );
-
-// Compute the absolute value of a 3-D point per element
-// 
-inline const Point3 absPerElem( const Point3 &pnt );
-
-// Copy sign from one 3-D point to another, per element
-// 
-inline const Point3 copySignPerElem( const Point3 &pnt0, const Point3 &pnt1 );
-
-// Maximum of two 3-D points per element
-// 
-inline const Point3 maxPerElem( const Point3 &pnt0, const Point3 &pnt1 );
-
-// Minimum of two 3-D points per element
-// 
-inline const Point3 minPerElem( const Point3 &pnt0, const Point3 &pnt1 );
-
-// Maximum element of a 3-D point
-// 
-inline const floatInVec maxElem( const Point3 &pnt );
-
-// Minimum element of a 3-D point
-// 
-inline const floatInVec minElem( const Point3 &pnt );
-
-// Compute the sum of all elements of a 3-D point
-// 
-inline const floatInVec sum( const Point3 &pnt );
-
-// Apply uniform scale to a 3-D point
-// 
-inline const Point3 scale( const Point3 &pnt, float scaleVal );
-
-// Apply uniform scale to a 3-D point (scalar data contained in vector data type)
-// 
-inline const Point3 scale( const Point3 &pnt, const floatInVec &scaleVal );
-
-// Apply non-uniform scale to a 3-D point
-// 
-inline const Point3 scale( const Point3 &pnt, const Vector3 &scaleVec );
-
-// Scalar projection of a 3-D point on a unit-length 3-D vector
-// 
-inline const floatInVec projection( const Point3 &pnt, const Vector3 &unitVec );
-
-// Compute the square of the distance of a 3-D point from the coordinate-system origin
-// 
-inline const floatInVec distSqrFromOrigin( const Point3 &pnt );
-
-// Compute the distance of a 3-D point from the coordinate-system origin
-// 
-inline const floatInVec distFromOrigin( const Point3 &pnt );
-
-// Compute the square of the distance between two 3-D points
-// 
-inline const floatInVec distSqr( const Point3 &pnt0, const Point3 &pnt1 );
-
-// Compute the distance between two 3-D points
-// 
-inline const floatInVec dist( const Point3 &pnt0, const Point3 &pnt1 );
-
-// Linear interpolation between two 3-D points
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Point3 lerp( float t, const Point3 &pnt0, const Point3 &pnt1 );
-
-// Linear interpolation between two 3-D points (scalar data contained in vector data type)
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Point3 lerp( const floatInVec &t, const Point3 &pnt0, const Point3 &pnt1 );
-
-// Conditionally select between two 3-D points
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// However, the transfer of select1 to a VMX register may use more processing time than a branch.
-// Use the boolInVec version for better performance.
-// 
-inline const Point3 select( const Point3 &pnt0, const Point3 &pnt1, bool select1 );
-
-// Conditionally select between two 3-D points (scalar data contained in vector data type)
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Point3 select( const Point3 &pnt0, const Point3 &pnt1, const boolInVec &select1 );
-
-// Store x, y, and z elements of 3-D point in first three words of a quadword, preserving fourth word
-// 
-inline void storeXYZ( const Point3 &pnt, __m128 * quad );
-
-// Load four three-float 3-D points, stored in three quadwords
-// 
-inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const __m128 * threeQuads );
-
-// Store four 3-D points in three quadwords
-// 
-inline void storeXYZArray( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, __m128 * threeQuads );
-
-// Store eight 3-D points as half-floats
-// 
-inline void storeHalfFloats( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, const Point3 &pnt4, const Point3 &pnt5, const Point3 &pnt6, const Point3 &pnt7, vec_ushort8 * threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3-D point
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Point3 &pnt );
-
-// Print a 3-D point and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Point3 &pnt, const char * name );
-
-#endif
-
-// A quaternion in array-of-structures format
-//
-class Quat
-{
-    __m128 mVec128;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Quat( ) { };
-
-    // Construct a quaternion from x, y, z, and w elements
-    // 
-    inline Quat( float x, float y, float z, float w );
-
-    // Construct a quaternion from x, y, z, and w elements (scalar data contained in vector data type)
-    // 
-    inline Quat( const floatInVec &x, const floatInVec &y, const floatInVec &z, const floatInVec &w );
-
-    // Construct a quaternion from a 3-D vector and a scalar
-    // 
-    inline Quat( const Vector3 &xyz, float w );
-
-    // Construct a quaternion from a 3-D vector and a scalar (scalar data contained in vector data type)
-    // 
-    inline Quat( const Vector3 &xyz, const floatInVec &w );
-
-    // Copy elements from a 4-D vector into a quaternion
-    // 
-    explicit inline Quat( const Vector4 &vec );
-
-    // Convert a rotation matrix to a unit-length quaternion
-    // 
-    explicit inline Quat( const Matrix3 & rotMat );
-
-    // Set all elements of a quaternion to the same scalar value
-    // 
-    explicit inline Quat( float scalar );
-
-    // Set all elements of a quaternion to the same scalar value (scalar data contained in vector data type)
-    // 
-    explicit inline Quat( const floatInVec &scalar );
-
-    // Set vector float data in a quaternion
-    // 
-    explicit inline Quat( __m128 vf4 );
-
-    // Get vector float data from a quaternion
-    // 
-    inline __m128 get128( ) const;
-
-    // Assign one quaternion to another
-    // 
-    inline Quat & operator =( const Quat &quat );
-
-    // Set the x, y, and z elements of a quaternion
-    // NOTE: 
-    // This function does not change the w element.
-    // 
-    inline Quat & setXYZ( const Vector3 &vec );
-
-    // Get the x, y, and z elements of a quaternion
-    // 
-    inline const Vector3 getXYZ( ) const;
-
-    // Set the x element of a quaternion
-    // 
-    inline Quat & setX( float x );
-
-    // Set the y element of a quaternion
-    // 
-    inline Quat & setY( float y );
-
-    // Set the z element of a quaternion
-    // 
-    inline Quat & setZ( float z );
-
-    // Set the w element of a quaternion
-    // 
-    inline Quat & setW( float w );
-
-    // Set the x element of a quaternion (scalar data contained in vector data type)
-    // 
-    inline Quat & setX( const floatInVec &x );
-
-    // Set the y element of a quaternion (scalar data contained in vector data type)
-    // 
-    inline Quat & setY( const floatInVec &y );
-
-    // Set the z element of a quaternion (scalar data contained in vector data type)
-    // 
-    inline Quat & setZ( const floatInVec &z );
-
-    // Set the w element of a quaternion (scalar data contained in vector data type)
-    // 
-    inline Quat & setW( const floatInVec &w );
-
-    // Get the x element of a quaternion
-    // 
-    inline const floatInVec getX( ) const;
-
-    // Get the y element of a quaternion
-    // 
-    inline const floatInVec getY( ) const;
-
-    // Get the z element of a quaternion
-    // 
-    inline const floatInVec getZ( ) const;
-
-    // Get the w element of a quaternion
-    // 
-    inline const floatInVec getW( ) const;
-
-    // Set an x, y, z, or w element of a quaternion by index
-    // 
-    inline Quat & setElem( int idx, float value );
-
-    // Set an x, y, z, or w element of a quaternion by index (scalar data contained in vector data type)
-    // 
-    inline Quat & setElem( int idx, const floatInVec &value );
-
-    // Get an x, y, z, or w element of a quaternion by index
-    // 
-    inline const floatInVec getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline VecIdx operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline const floatInVec operator []( int idx ) const;
-
-    // Add two quaternions
-    // 
-    inline const Quat operator +( const Quat &quat ) const;
-
-    // Subtract a quaternion from another quaternion
-    // 
-    inline const Quat operator -( const Quat &quat ) const;
-
-    // Multiply two quaternions
-    // 
-    inline const Quat operator *( const Quat &quat ) const;
-
-    // Multiply a quaternion by a scalar
-    // 
-    inline const Quat operator *( float scalar ) const;
-
-    // Divide a quaternion by a scalar
-    // 
-    inline const Quat operator /( float scalar ) const;
-
-    // Multiply a quaternion by a scalar (scalar data contained in vector data type)
-    // 
-    inline const Quat operator *( const floatInVec &scalar ) const;
-
-    // Divide a quaternion by a scalar (scalar data contained in vector data type)
-    // 
-    inline const Quat operator /( const floatInVec &scalar ) const;
-
-    // Perform compound assignment and addition with a quaternion
-    // 
-    inline Quat & operator +=( const Quat &quat );
-
-    // Perform compound assignment and subtraction by a quaternion
-    // 
-    inline Quat & operator -=( const Quat &quat );
-
-    // Perform compound assignment and multiplication by a quaternion
-    // 
-    inline Quat & operator *=( const Quat &quat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Quat & operator *=( float scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Quat & operator /=( float scalar );
-
-    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
-    // 
-    inline Quat & operator *=( const floatInVec &scalar );
-
-    // Perform compound assignment and division by a scalar (scalar data contained in vector data type)
-    // 
-    inline Quat & operator /=( const floatInVec &scalar );
-
-    // Negate all elements of a quaternion
-    // 
-    inline const Quat operator -( ) const;
-
-    // Construct an identity quaternion
-    // 
-    static inline const Quat identity( );
-
-    // Construct a quaternion to rotate between two unit-length 3-D vectors
-    // NOTE: 
-    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
-    // 
-    static inline const Quat rotation( const Vector3 &unitVec0, const Vector3 &unitVec1 );
-
-    // Construct a quaternion to rotate around a unit-length 3-D vector
-    // 
-    static inline const Quat rotation( float radians, const Vector3 &unitVec );
-
-    // Construct a quaternion to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
-    // 
-    static inline const Quat rotation( const floatInVec &radians, const Vector3 &unitVec );
-
-    // Construct a quaternion to rotate around the x axis
-    // 
-    static inline const Quat rotationX( float radians );
-
-    // Construct a quaternion to rotate around the y axis
-    // 
-    static inline const Quat rotationY( float radians );
-
-    // Construct a quaternion to rotate around the z axis
-    // 
-    static inline const Quat rotationZ( float radians );
-
-    // Construct a quaternion to rotate around the x axis (scalar data contained in vector data type)
-    // 
-    static inline const Quat rotationX( const floatInVec &radians );
-
-    // Construct a quaternion to rotate around the y axis (scalar data contained in vector data type)
-    // 
-    static inline const Quat rotationY( const floatInVec &radians );
-
-    // Construct a quaternion to rotate around the z axis (scalar data contained in vector data type)
-    // 
-    static inline const Quat rotationZ( const floatInVec &radians );
-
-};
-
-// Multiply a quaternion by a scalar
-// 
-inline const Quat operator *( float scalar, const Quat &quat );
-
-// Multiply a quaternion by a scalar (scalar data contained in vector data type)
-// 
-inline const Quat operator *( const floatInVec &scalar, const Quat &quat );
-
-// Compute the conjugate of a quaternion
-// 
-inline const Quat conj( const Quat &quat );
-
-// Use a unit-length quaternion to rotate a 3-D vector
-// 
-inline const Vector3 rotate( const Quat &unitQuat, const Vector3 &vec );
-
-// Compute the dot product of two quaternions
-// 
-inline const floatInVec dot( const Quat &quat0, const Quat &quat1 );
-
-// Compute the norm of a quaternion
-// 
-inline const floatInVec norm( const Quat &quat );
-
-// Compute the length of a quaternion
-// 
-inline const floatInVec length( const Quat &quat );
-
-// Normalize a quaternion
-// NOTE: 
-// The result is unpredictable when all elements of quat are at or near zero.
-// 
-inline const Quat normalize( const Quat &quat );
-
-// Linear interpolation between two quaternions
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat lerp( float t, const Quat &quat0, const Quat &quat1 );
-
-// Linear interpolation between two quaternions (scalar data contained in vector data type)
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat lerp( const floatInVec &t, const Quat &quat0, const Quat &quat1 );
-
-// Spherical linear interpolation between two quaternions
-// NOTE: 
-// Interpolates along the shortest path between orientations.
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat slerp( float t, const Quat &unitQuat0, const Quat &unitQuat1 );
-
-// Spherical linear interpolation between two quaternions (scalar data contained in vector data type)
-// NOTE: 
-// Interpolates along the shortest path between orientations.
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat slerp( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1 );
-
-// Spherical quadrangle interpolation
-// 
-inline const Quat squad( float t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 );
-
-// Spherical quadrangle interpolation (scalar data contained in vector data type)
-// 
-inline const Quat squad( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 );
-
-// Conditionally select between two quaternions
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// However, the transfer of select1 to a VMX register may use more processing time than a branch.
-// Use the boolInVec version for better performance.
-// 
-inline const Quat select( const Quat &quat0, const Quat &quat1, bool select1 );
-
-// Conditionally select between two quaternions (scalar data contained in vector data type)
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Quat select( const Quat &quat0, const Quat &quat1, const boolInVec &select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a quaternion
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Quat &quat );
-
-// Print a quaternion and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Quat &quat, const char * name );
-
-#endif
-
-// A 3x3 matrix in array-of-structures format
-//
-class Matrix3
-{
-    Vector3 mCol0;
-    Vector3 mCol1;
-    Vector3 mCol2;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Matrix3( ) { };
-
-    // Copy a 3x3 matrix
-    // 
-    inline Matrix3( const Matrix3 & mat );
-
-    // Construct a 3x3 matrix containing the specified columns
-    // 
-    inline Matrix3( const Vector3 &col0, const Vector3 &col1, const Vector3 &col2 );
-
-    // Construct a 3x3 rotation matrix from a unit-length quaternion
-    // 
-    explicit inline Matrix3( const Quat &unitQuat );
-
-    // Set all elements of a 3x3 matrix to the same scalar value
-    // 
-    explicit inline Matrix3( float scalar );
-
-    // Set all elements of a 3x3 matrix to the same scalar value (scalar data contained in vector data type)
-    // 
-    explicit inline Matrix3( const floatInVec &scalar );
-
-    // Assign one 3x3 matrix to another
-    // 
-    inline Matrix3 & operator =( const Matrix3 & mat );
-
-    // Set column 0 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol0( const Vector3 &col0 );
-
-    // Set column 1 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol1( const Vector3 &col1 );
-
-    // Set column 2 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol2( const Vector3 &col2 );
-
-    // Get column 0 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol0( ) const;
-
-    // Get column 1 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol1( ) const;
-
-    // Get column 2 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol2( ) const;
-
-    // Set the column of a 3x3 matrix referred to by the specified index
-    // 
-    inline Matrix3 & setCol( int col, const Vector3 &vec );
-
-    // Set the row of a 3x3 matrix referred to by the specified index
-    // 
-    inline Matrix3 & setRow( int row, const Vector3 &vec );
-
-    // Get the column of a 3x3 matrix referred to by the specified index
-    // 
-    inline const Vector3 getCol( int col ) const;
-
-    // Get the row of a 3x3 matrix referred to by the specified index
-    // 
-    inline const Vector3 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector3 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector3 operator []( int col ) const;
-
-    // Set the element of a 3x3 matrix referred to by column and row indices
-    // 
-    inline Matrix3 & setElem( int col, int row, float val );
-
-    // Set the element of a 3x3 matrix referred to by column and row indices (scalar data contained in vector data type)
-    // 
-    inline Matrix3 & setElem( int col, int row, const floatInVec &val );
-
-    // Get the element of a 3x3 matrix referred to by column and row indices
-    // 
-    inline const floatInVec getElem( int col, int row ) const;
-
-    // Add two 3x3 matrices
-    // 
-    inline const Matrix3 operator +( const Matrix3 & mat ) const;
-
-    // Subtract a 3x3 matrix from another 3x3 matrix
-    // 
-    inline const Matrix3 operator -( const Matrix3 & mat ) const;
-
-    // Negate all elements of a 3x3 matrix
-    // 
-    inline const Matrix3 operator -( ) const;
-
-    // Multiply a 3x3 matrix by a scalar
-    // 
-    inline const Matrix3 operator *( float scalar ) const;
-
-    // Multiply a 3x3 matrix by a scalar (scalar data contained in vector data type)
-    // 
-    inline const Matrix3 operator *( const floatInVec &scalar ) const;
-
-    // Multiply a 3x3 matrix by a 3-D vector
-    // 
-    inline const Vector3 operator *( const Vector3 &vec ) const;
-
-    // Multiply two 3x3 matrices
-    // 
-    inline const Matrix3 operator *( const Matrix3 & mat ) const;
-
-    // Perform compound assignment and addition with a 3x3 matrix
-    // 
-    inline Matrix3 & operator +=( const Matrix3 & mat );
-
-    // Perform compound assignment and subtraction by a 3x3 matrix
-    // 
-    inline Matrix3 & operator -=( const Matrix3 & mat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Matrix3 & operator *=( float scalar );
-
-    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
-    // 
-    inline Matrix3 & operator *=( const floatInVec &scalar );
-
-    // Perform compound assignment and multiplication by a 3x3 matrix
-    // 
-    inline Matrix3 & operator *=( const Matrix3 & mat );
-
-    // Construct an identity 3x3 matrix
-    // 
-    static inline const Matrix3 identity( );
-
-    // Construct a 3x3 matrix to rotate around the x axis
-    // 
-    static inline const Matrix3 rotationX( float radians );
-
-    // Construct a 3x3 matrix to rotate around the y axis
-    // 
-    static inline const Matrix3 rotationY( float radians );
-
-    // Construct a 3x3 matrix to rotate around the z axis
-    // 
-    static inline const Matrix3 rotationZ( float radians );
-
-    // Construct a 3x3 matrix to rotate around the x axis (scalar data contained in vector data type)
-    // 
-    static inline const Matrix3 rotationX( const floatInVec &radians );
-
-    // Construct a 3x3 matrix to rotate around the y axis (scalar data contained in vector data type)
-    // 
-    static inline const Matrix3 rotationY( const floatInVec &radians );
-
-    // Construct a 3x3 matrix to rotate around the z axis (scalar data contained in vector data type)
-    // 
-    static inline const Matrix3 rotationZ( const floatInVec &radians );
-
-    // Construct a 3x3 matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Matrix3 rotationZYX( const Vector3 &radiansXYZ );
-
-    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Matrix3 rotation( float radians, const Vector3 &unitVec );
-
-    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
-    // 
-    static inline const Matrix3 rotation( const floatInVec &radians, const Vector3 &unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Matrix3 rotation( const Quat &unitQuat );
-
-    // Construct a 3x3 matrix to perform scaling
-    // 
-    static inline const Matrix3 scale( const Vector3 &scaleVec );
-
-};
-// Multiply a 3x3 matrix by a scalar
-// 
-inline const Matrix3 operator *( float scalar, const Matrix3 & mat );
-
-// Multiply a 3x3 matrix by a scalar (scalar data contained in vector data type)
-// 
-inline const Matrix3 operator *( const floatInVec &scalar, const Matrix3 & mat );
-
-// Append (post-multiply) a scale transformation to a 3x3 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 &scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix3 prependScale( const Vector3 &scaleVec, const Matrix3 & mat );
-
-// Multiply two 3x3 matrices per element
-// 
-inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
-
-// Compute the absolute value of a 3x3 matrix per element
-// 
-inline const Matrix3 absPerElem( const Matrix3 & mat );
-
-// Transpose of a 3x3 matrix
-// 
-inline const Matrix3 transpose( const Matrix3 & mat );
-
-// Compute the inverse of a 3x3 matrix
-// NOTE: 
-// Result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix3 inverse( const Matrix3 & mat );
-
-// Determinant of a 3x3 matrix
-// 
-inline const floatInVec determinant( const Matrix3 & mat );
-
-// Conditionally select between two 3x3 matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// However, the transfer of select1 to a VMX register may use more processing time than a branch.
-// Use the boolInVec version for better performance.
-// 
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 );
-
-// Conditionally select between two 3x3 matrices (scalar data contained in vector data type)
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, const boolInVec &select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3x3 matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix3 & mat );
-
-// Print a 3x3 matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix3 & mat, const char * name );
-
-#endif
-
-// A 4x4 matrix in array-of-structures format
-//
-class Matrix4
-{
-    Vector4 mCol0;
-    Vector4 mCol1;
-    Vector4 mCol2;
-    Vector4 mCol3;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Matrix4( ) { };
-
-    // Copy a 4x4 matrix
-    // 
-    inline Matrix4( const Matrix4 & mat );
-
-    // Construct a 4x4 matrix containing the specified columns
-    // 
-    inline Matrix4( const Vector4 &col0, const Vector4 &col1, const Vector4 &col2, const Vector4 &col3 );
-
-    // Construct a 4x4 matrix from a 3x4 transformation matrix
-    // 
-    explicit inline Matrix4( const Transform3 & mat );
-
-    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
-    // 
-    inline Matrix4( const Matrix3 & mat, const Vector3 &translateVec );
-
-    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
-    // 
-    inline Matrix4( const Quat &unitQuat, const Vector3 &translateVec );
-
-    // Set all elements of a 4x4 matrix to the same scalar value
-    // 
-    explicit inline Matrix4( float scalar );
-
-    // Set all elements of a 4x4 matrix to the same scalar value (scalar data contained in vector data type)
-    // 
-    explicit inline Matrix4( const floatInVec &scalar );
-
-    // Assign one 4x4 matrix to another
-    // 
-    inline Matrix4 & operator =( const Matrix4 & mat );
-
-    // Set the upper-left 3x3 submatrix
-    // NOTE: 
-    // This function does not change the bottom row elements.
-    // 
-    inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
-
-    // Get the upper-left 3x3 submatrix of a 4x4 matrix
-    // 
-    inline const Matrix3 getUpper3x3( ) const;
-
-    // Set translation component
-    // NOTE: 
-    // This function does not change the bottom row elements.
-    // 
-    inline Matrix4 & setTranslation( const Vector3 &translateVec );
-
-    // Get the translation component of a 4x4 matrix
-    // 
-    inline const Vector3 getTranslation( ) const;
-
-    // Set column 0 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol0( const Vector4 &col0 );
-
-    // Set column 1 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol1( const Vector4 &col1 );
-
-    // Set column 2 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol2( const Vector4 &col2 );
-
-    // Set column 3 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol3( const Vector4 &col3 );
-
-    // Get column 0 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol0( ) const;
-
-    // Get column 1 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol1( ) const;
-
-    // Get column 2 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol2( ) const;
-
-    // Get column 3 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol3( ) const;
-
-    // Set the column of a 4x4 matrix referred to by the specified index
-    // 
-    inline Matrix4 & setCol( int col, const Vector4 &vec );
-
-    // Set the row of a 4x4 matrix referred to by the specified index
-    // 
-    inline Matrix4 & setRow( int row, const Vector4 &vec );
-
-    // Get the column of a 4x4 matrix referred to by the specified index
-    // 
-    inline const Vector4 getCol( int col ) const;
-
-    // Get the row of a 4x4 matrix referred to by the specified index
-    // 
-    inline const Vector4 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector4 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector4 operator []( int col ) const;
-
-    // Set the element of a 4x4 matrix referred to by column and row indices
-    // 
-    inline Matrix4 & setElem( int col, int row, float val );
-
-    // Set the element of a 4x4 matrix referred to by column and row indices (scalar data contained in vector data type)
-    // 
-    inline Matrix4 & setElem( int col, int row, const floatInVec &val );
-
-    // Get the element of a 4x4 matrix referred to by column and row indices
-    // 
-    inline const floatInVec getElem( int col, int row ) const;
-
-    // Add two 4x4 matrices
-    // 
-    inline const Matrix4 operator +( const Matrix4 & mat ) const;
-
-    // Subtract a 4x4 matrix from another 4x4 matrix
-    // 
-    inline const Matrix4 operator -( const Matrix4 & mat ) const;
-
-    // Negate all elements of a 4x4 matrix
-    // 
-    inline const Matrix4 operator -( ) const;
-
-    // Multiply a 4x4 matrix by a scalar
-    // 
-    inline const Matrix4 operator *( float scalar ) const;
-
-    // Multiply a 4x4 matrix by a scalar (scalar data contained in vector data type)
-    // 
-    inline const Matrix4 operator *( const floatInVec &scalar ) const;
-
-    // Multiply a 4x4 matrix by a 4-D vector
-    // 
-    inline const Vector4 operator *( const Vector4 &vec ) const;
-
-    // Multiply a 4x4 matrix by a 3-D vector
-    // 
-    inline const Vector4 operator *( const Vector3 &vec ) const;
-
-    // Multiply a 4x4 matrix by a 3-D point
-    // 
-    inline const Vector4 operator *( const Point3 &pnt ) const;
-
-    // Multiply two 4x4 matrices
-    // 
-    inline const Matrix4 operator *( const Matrix4 & mat ) const;
-
-    // Multiply a 4x4 matrix by a 3x4 transformation matrix
-    // 
-    inline const Matrix4 operator *( const Transform3 & tfrm ) const;
-
-    // Perform compound assignment and addition with a 4x4 matrix
-    // 
-    inline Matrix4 & operator +=( const Matrix4 & mat );
-
-    // Perform compound assignment and subtraction by a 4x4 matrix
-    // 
-    inline Matrix4 & operator -=( const Matrix4 & mat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Matrix4 & operator *=( float scalar );
-
-    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
-    // 
-    inline Matrix4 & operator *=( const floatInVec &scalar );
-
-    // Perform compound assignment and multiplication by a 4x4 matrix
-    // 
-    inline Matrix4 & operator *=( const Matrix4 & mat );
-
-    // Perform compound assignment and multiplication by a 3x4 transformation matrix
-    // 
-    inline Matrix4 & operator *=( const Transform3 & tfrm );
-
-    // Construct an identity 4x4 matrix
-    // 
-    static inline const Matrix4 identity( );
-
-    // Construct a 4x4 matrix to rotate around the x axis
-    // 
-    static inline const Matrix4 rotationX( float radians );
-
-    // Construct a 4x4 matrix to rotate around the y axis
-    // 
-    static inline const Matrix4 rotationY( float radians );
-
-    // Construct a 4x4 matrix to rotate around the z axis
-    // 
-    static inline const Matrix4 rotationZ( float radians );
-
-    // Construct a 4x4 matrix to rotate around the x axis (scalar data contained in vector data type)
-    // 
-    static inline const Matrix4 rotationX( const floatInVec &radians );
-
-    // Construct a 4x4 matrix to rotate around the y axis (scalar data contained in vector data type)
-    // 
-    static inline const Matrix4 rotationY( const floatInVec &radians );
-
-    // Construct a 4x4 matrix to rotate around the z axis (scalar data contained in vector data type)
-    // 
-    static inline const Matrix4 rotationZ( const floatInVec &radians );
-
-    // Construct a 4x4 matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Matrix4 rotationZYX( const Vector3 &radiansXYZ );
-
-    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Matrix4 rotation( float radians, const Vector3 &unitVec );
-
-    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
-    // 
-    static inline const Matrix4 rotation( const floatInVec &radians, const Vector3 &unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Matrix4 rotation( const Quat &unitQuat );
-
-    // Construct a 4x4 matrix to perform scaling
-    // 
-    static inline const Matrix4 scale( const Vector3 &scaleVec );
-
-    // Construct a 4x4 matrix to perform translation
-    // 
-    static inline const Matrix4 translation( const Vector3 &translateVec );
-
-    // Construct viewing matrix based on eye, position looked at, and up direction
-    // 
-    static inline const Matrix4 lookAt( const Point3 &eyePos, const Point3 &lookAtPos, const Vector3 &upVec );
-
-    // Construct a perspective projection matrix
-    // 
-    static inline const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar );
-
-    // Construct a perspective projection matrix based on frustum
-    // 
-    static inline const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar );
-
-    // Construct an orthographic projection matrix
-    // 
-    static inline const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar );
-
-};
-// Multiply a 4x4 matrix by a scalar
-// 
-inline const Matrix4 operator *( float scalar, const Matrix4 & mat );
-
-// Multiply a 4x4 matrix by a scalar (scalar data contained in vector data type)
-// 
-inline const Matrix4 operator *( const floatInVec &scalar, const Matrix4 & mat );
-
-// Append (post-multiply) a scale transformation to a 4x4 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 &scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix4 prependScale( const Vector3 &scaleVec, const Matrix4 & mat );
-
-// Multiply two 4x4 matrices per element
-// 
-inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
-
-// Compute the absolute value of a 4x4 matrix per element
-// 
-inline const Matrix4 absPerElem( const Matrix4 & mat );
-
-// Transpose of a 4x4 matrix
-// 
-inline const Matrix4 transpose( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix
-// NOTE: 
-// Result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix4 inverse( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix4 affineInverse( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
-// 
-inline const Matrix4 orthoInverse( const Matrix4 & mat );
-
-// Determinant of a 4x4 matrix
-// 
-inline const floatInVec determinant( const Matrix4 & mat );
-
-// Conditionally select between two 4x4 matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// However, the transfer of select1 to a VMX register may use more processing time than a branch.
-// Use the boolInVec version for better performance.
-// 
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 );
-
-// Conditionally select between two 4x4 matrices (scalar data contained in vector data type)
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, const boolInVec &select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 4x4 matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix4 & mat );
-
-// Print a 4x4 matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix4 & mat, const char * name );
-
-#endif
-
-// A 3x4 transformation matrix in array-of-structures format
-//
-class Transform3
-{
-    Vector3 mCol0;
-    Vector3 mCol1;
-    Vector3 mCol2;
-    Vector3 mCol3;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Transform3( ) { };
-
-    // Copy a 3x4 transformation matrix
-    // 
-    inline Transform3( const Transform3 & tfrm );
-
-    // Construct a 3x4 transformation matrix containing the specified columns
-    // 
-    inline Transform3( const Vector3 &col0, const Vector3 &col1, const Vector3 &col2, const Vector3 &col3 );
-
-    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
-    // 
-    inline Transform3( const Matrix3 & tfrm, const Vector3 &translateVec );
-
-    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
-    // 
-    inline Transform3( const Quat &unitQuat, const Vector3 &translateVec );
-
-    // Set all elements of a 3x4 transformation matrix to the same scalar value
-    // 
-    explicit inline Transform3( float scalar );
-
-    // Set all elements of a 3x4 transformation matrix to the same scalar value (scalar data contained in vector data type)
-    // 
-    explicit inline Transform3( const floatInVec &scalar );
-
-    // Assign one 3x4 transformation matrix to another
-    // 
-    inline Transform3 & operator =( const Transform3 & tfrm );
-
-    // Set the upper-left 3x3 submatrix
-    // 
-    inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
-
-    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
-    // 
-    inline const Matrix3 getUpper3x3( ) const;
-
-    // Set translation component
-    // 
-    inline Transform3 & setTranslation( const Vector3 &translateVec );
-
-    // Get the translation component of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getTranslation( ) const;
-
-    // Set column 0 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol0( const Vector3 &col0 );
-
-    // Set column 1 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol1( const Vector3 &col1 );
-
-    // Set column 2 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol2( const Vector3 &col2 );
-
-    // Set column 3 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol3( const Vector3 &col3 );
-
-    // Get column 0 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol0( ) const;
-
-    // Get column 1 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol1( ) const;
-
-    // Get column 2 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol2( ) const;
-
-    // Get column 3 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol3( ) const;
-
-    // Set the column of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline Transform3 & setCol( int col, const Vector3 &vec );
-
-    // Set the row of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline Transform3 & setRow( int row, const Vector4 &vec );
-
-    // Get the column of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline const Vector3 getCol( int col ) const;
-
-    // Get the row of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline const Vector4 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector3 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector3 operator []( int col ) const;
-
-    // Set the element of a 3x4 transformation matrix referred to by column and row indices
-    // 
-    inline Transform3 & setElem( int col, int row, float val );
-
-    // Set the element of a 3x4 transformation matrix referred to by column and row indices (scalar data contained in vector data type)
-    // 
-    inline Transform3 & setElem( int col, int row, const floatInVec &val );
-
-    // Get the element of a 3x4 transformation matrix referred to by column and row indices
-    // 
-    inline const floatInVec getElem( int col, int row ) const;
-
-    // Multiply a 3x4 transformation matrix by a 3-D vector
-    // 
-    inline const Vector3 operator *( const Vector3 &vec ) const;
-
-    // Multiply a 3x4 transformation matrix by a 3-D point
-    // 
-    inline const Point3 operator *( const Point3 &pnt ) const;
-
-    // Multiply two 3x4 transformation matrices
-    // 
-    inline const Transform3 operator *( const Transform3 & tfrm ) const;
-
-    // Perform compound assignment and multiplication by a 3x4 transformation matrix
-    // 
-    inline Transform3 & operator *=( const Transform3 & tfrm );
-
-    // Construct an identity 3x4 transformation matrix
-    // 
-    static inline const Transform3 identity( );
-
-    // Construct a 3x4 transformation matrix to rotate around the x axis
-    // 
-    static inline const Transform3 rotationX( float radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the y axis
-    // 
-    static inline const Transform3 rotationY( float radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the z axis
-    // 
-    static inline const Transform3 rotationZ( float radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the x axis (scalar data contained in vector data type)
-    // 
-    static inline const Transform3 rotationX( const floatInVec &radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the y axis (scalar data contained in vector data type)
-    // 
-    static inline const Transform3 rotationY( const floatInVec &radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the z axis (scalar data contained in vector data type)
-    // 
-    static inline const Transform3 rotationZ( const floatInVec &radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Transform3 rotationZYX( const Vector3 &radiansXYZ );
-
-    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Transform3 rotation( float radians, const Vector3 &unitVec );
-
-    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
-    // 
-    static inline const Transform3 rotation( const floatInVec &radians, const Vector3 &unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Transform3 rotation( const Quat &unitQuat );
-
-    // Construct a 3x4 transformation matrix to perform scaling
-    // 
-    static inline const Transform3 scale( const Vector3 &scaleVec );
-
-    // Construct a 3x4 transformation matrix to perform translation
-    // 
-    static inline const Transform3 translation( const Vector3 &translateVec );
-
-};
-// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 &scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Transform3 prependScale( const Vector3 &scaleVec, const Transform3 & tfrm );
-
-// Multiply two 3x4 transformation matrices per element
-// 
-inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
-
-// Compute the absolute value of a 3x4 transformation matrix per element
-// 
-inline const Transform3 absPerElem( const Transform3 & tfrm );
-
-// Inverse of a 3x4 transformation matrix
-// NOTE: 
-// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
-// 
-inline const Transform3 inverse( const Transform3 & tfrm );
-
-// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
-// 
-inline const Transform3 orthoInverse( const Transform3 & tfrm );
-
-// Conditionally select between two 3x4 transformation matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// However, the transfer of select1 to a VMX register may use more processing time than a branch.
-// Use the boolInVec version for better performance.
-// 
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 );
-
-// Conditionally select between two 3x4 transformation matrices (scalar data contained in vector data type)
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, const boolInVec &select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3x4 transformation matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Transform3 & tfrm );
-
-// Print a 3x4 transformation matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Transform3 & tfrm, const char * name );
-
-#endif
-
-} // namespace Aos
-} // namespace Vectormath
-
-#include "vec_aos.h"
-#include "quat_aos.h"
-#include "mat_aos.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_AOS_CPP_SSE_H
+#define _VECTORMATH_AOS_CPP_SSE_H
+
+#include <math.h>
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#include <assert.h>
+
+// TODO: Tidy
+typedef __m128 vec_float4;
+typedef __m128 vec_uint4;
+typedef __m128 vec_int4;
+typedef __m128i vec_uchar16;
+typedef __m128i vec_ushort8;
+
+#define vec_splat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e,e,e,e))
+
+#define _mm_ror_ps(vec,i)	\
+	(((i)%4) ? (_mm_shuffle_ps(vec,vec, _MM_SHUFFLE((unsigned char)(i+3)%4,(unsigned char)(i+2)%4,(unsigned char)(i+1)%4,(unsigned char)(i+0)%4))) : (vec))
+#define _mm_rol_ps(vec,i)	\
+	(((i)%4) ? (_mm_shuffle_ps(vec,vec, _MM_SHUFFLE((unsigned char)(7-i)%4,(unsigned char)(6-i)%4,(unsigned char)(5-i)%4,(unsigned char)(4-i)%4))) : (vec))
+
+#define vec_sld(vec,vec2,x) _mm_ror_ps(vec, ((x)/4))
+
+#define _mm_abs_ps(vec)		_mm_andnot_ps(_MASKSIGN_,vec)
+#define _mm_neg_ps(vec)		_mm_xor_ps(_MASKSIGN_,vec)
+
+#define vec_madd(a, b, c) _mm_add_ps(c, _mm_mul_ps(a, b) )
+
+union SSEFloat
+{
+	__m128 m128;
+	float f[4];
+};
+
+static inline __m128 vec_sel(__m128 a, __m128 b, __m128 mask)
+{
+	return _mm_or_ps(_mm_and_ps(mask, b), _mm_andnot_ps(mask, a));
+}
+static inline __m128 vec_sel(__m128 a, __m128 b, const unsigned int *_mask)
+{
+	return vec_sel(a, b, _mm_load_ps((float *)_mask));
+}
+static inline __m128 vec_sel(__m128 a, __m128 b, unsigned int _mask)
+{
+	return vec_sel(a, b, _mm_set1_ps(*(float *)&_mask));
+}
+
+static inline __m128 toM128(unsigned int x)
+{
+    return _mm_set1_ps( *(float *)&x );
+}
+
+static inline __m128 fabsf4(__m128 x)
+{
+    return _mm_and_ps( x, toM128( 0x7fffffff ) );
+}
+/*
+union SSE64
+{
+	__m128 m128;
+	struct
+	{
+		__m64 m01;
+		__m64 m23;
+	} m64;
+};
+
+static inline __m128 vec_cts(__m128 x, int a)
+{
+	assert(a == 0); // Only 2^0 supported
+	(void)a;
+	SSE64 sse64;
+	sse64.m64.m01 = _mm_cvttps_pi32(x);
+	sse64.m64.m23 = _mm_cvttps_pi32(_mm_ror_ps(x,2));
+	_mm_empty();
+    return sse64.m128;
+}
+
+static inline __m128 vec_ctf(__m128 x, int a)
+{
+	assert(a == 0); // Only 2^0 supported
+	(void)a;
+	SSE64 sse64;
+	sse64.m128 = x;
+	__m128 result =_mm_movelh_ps(
+		_mm_cvt_pi2ps(_mm_setzero_ps(), sse64.m64.m01),
+		_mm_cvt_pi2ps(_mm_setzero_ps(), sse64.m64.m23));
+	_mm_empty();
+	return result;
+}
+*/
+static inline __m128 vec_cts(__m128 x, int a)
+{
+	assert(a == 0); // Only 2^0 supported
+	(void)a;
+	__m128i result = _mm_cvtps_epi32(x);
+    return (__m128 &)result;
+}
+
+static inline __m128 vec_ctf(__m128 x, int a)
+{
+	assert(a == 0); // Only 2^0 supported
+	(void)a;
+	return _mm_cvtepi32_ps((__m128i &)x);
+}
+
+#define vec_nmsub(a,b,c) _mm_sub_ps( c, _mm_mul_ps( a, b ) )
+#define vec_sub(a,b) _mm_sub_ps( a, b )
+#define vec_add(a,b) _mm_add_ps( a, b )
+#define vec_mul(a,b) _mm_mul_ps( a, b )
+#define vec_xor(a,b) _mm_xor_ps( a, b )
+#define vec_and(a,b) _mm_and_ps( a, b )
+#define vec_cmpeq(a,b) _mm_cmpeq_ps( a, b )
+#define vec_cmpgt(a,b) _mm_cmpgt_ps( a, b )
+
+#define vec_mergeh(a,b) _mm_unpacklo_ps( a, b )
+#define vec_mergel(a,b) _mm_unpackhi_ps( a, b )
+
+#define vec_andc(a,b) _mm_andnot_ps( b, a )
+
+#define sqrtf4(x) _mm_sqrt_ps( x )
+#define rsqrtf4(x) _mm_rsqrt_ps( x )
+#define recipf4(x) _mm_rcp_ps( x )
+#define negatef4(x) _mm_sub_ps( _mm_setzero_ps(), x )
+
+static inline __m128 acosf4(__m128 x)
+{
+    __m128 xabs = fabsf4(x);
+	__m128 select = _mm_cmplt_ps( x, _mm_setzero_ps() );
+    __m128 t1 = sqrtf4(vec_sub(_mm_set1_ps(1.0f), xabs));
+    
+    /* Instruction counts can be reduced if the polynomial was
+     * computed entirely from nested (dependent) fma's. However, 
+     * to reduce the number of pipeline stalls, the polygon is evaluated 
+     * in two halves (hi amd lo). 
+     */
+    __m128 xabs2 = _mm_mul_ps(xabs,  xabs);
+    __m128 xabs4 = _mm_mul_ps(xabs2, xabs2);
+    __m128 hi = vec_madd(vec_madd(vec_madd(_mm_set1_ps(-0.0012624911f),
+		xabs, _mm_set1_ps(0.0066700901f)),
+			xabs, _mm_set1_ps(-0.0170881256f)),
+				xabs, _mm_set1_ps( 0.0308918810f));
+    __m128 lo = vec_madd(vec_madd(vec_madd(_mm_set1_ps(-0.0501743046f),
+		xabs, _mm_set1_ps(0.0889789874f)),
+			xabs, _mm_set1_ps(-0.2145988016f)),
+				xabs, _mm_set1_ps( 1.5707963050f));
+    
+    __m128 result = vec_madd(hi, xabs4, lo);
+    
+    // Adjust the result if x is negactive.
+    return vec_sel(
+		vec_mul(t1, result),									// Positive
+		vec_nmsub(t1, result, _mm_set1_ps(3.1415926535898f)),	// Negative
+		select);
+}
+
+static inline __m128 sinf4(vec_float4 x)
+{
+
+//
+// Common constants used to evaluate sinf4/cosf4/tanf4
+//
+#define _SINCOS_CC0  -0.0013602249f
+#define _SINCOS_CC1   0.0416566950f
+#define _SINCOS_CC2  -0.4999990225f
+#define _SINCOS_SC0  -0.0001950727f
+#define _SINCOS_SC1   0.0083320758f
+#define _SINCOS_SC2  -0.1666665247f
+
+#define _SINCOS_KC1  1.57079625129f
+#define _SINCOS_KC2  7.54978995489e-8f
+
+    vec_float4 xl,xl2,xl3,res;
+
+    // Range reduction using : xl = angle * TwoOverPi;
+    //  
+    xl = vec_mul(x, _mm_set1_ps(0.63661977236f));
+
+    // Find the quadrant the angle falls in
+    // using:  q = (int) (ceil(abs(xl))*sign(xl))
+    //
+    vec_int4 q = vec_cts(xl,0);
+
+    // Compute an offset based on the quadrant that the angle falls in
+    // 
+    vec_int4 offset = _mm_and_ps(q,toM128(0x3));
+
+    // Remainder in range [-pi/4..pi/4]
+    //
+    vec_float4 qf = vec_ctf(q,0);
+    xl  = vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC2),vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC1),x));
+    
+    // Compute x^2 and x^3
+    //
+    xl2 = vec_mul(xl,xl);
+    xl3 = vec_mul(xl2,xl);
+    
+    // Compute both the sin and cos of the angles
+    // using a polynomial expression:
+    //   cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and
+    //   sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2)
+    //
+    
+    vec_float4 cx =
+		vec_madd(
+			vec_madd(
+				vec_madd(_mm_set1_ps(_SINCOS_CC0),xl2,_mm_set1_ps(_SINCOS_CC1)),xl2,_mm_set1_ps(_SINCOS_CC2)),xl2,_mm_set1_ps(1.0f));
+    vec_float4 sx =
+		vec_madd(
+			vec_madd(
+				vec_madd(_mm_set1_ps(_SINCOS_SC0),xl2,_mm_set1_ps(_SINCOS_SC1)),xl2,_mm_set1_ps(_SINCOS_SC2)),xl3,xl);
+
+    // Use the cosine when the offset is odd and the sin
+    // when the offset is even
+    //
+    res = vec_sel(cx,sx,vec_cmpeq(vec_and(offset,
+                                          toM128(0x1)),
+										  _mm_setzero_ps()));
+
+    // Flip the sign of the result when (offset mod 4) = 1 or 2
+    //
+    return vec_sel(
+		vec_xor(toM128(0x80000000U), res),	// Negative
+		res,								// Positive
+		vec_cmpeq(vec_and(offset,toM128(0x2)),_mm_setzero_ps()));
+}
+
+static inline void sincosf4(vec_float4 x, vec_float4* s, vec_float4* c)
+{
+    vec_float4 xl,xl2,xl3;
+    vec_int4   offsetSin, offsetCos;
+
+    // Range reduction using : xl = angle * TwoOverPi;
+    //  
+    xl = vec_mul(x, _mm_set1_ps(0.63661977236f));
+
+    // Find the quadrant the angle falls in
+    // using:  q = (int) (ceil(abs(xl))*sign(xl))
+    //
+    //vec_int4 q = vec_cts(vec_add(xl,vec_sel(_mm_set1_ps(0.5f),xl,(0x80000000))),0);
+    vec_int4 q = vec_cts(xl,0);
+     
+    // Compute the offset based on the quadrant that the angle falls in.
+    // Add 1 to the offset for the cosine. 
+    //
+    offsetSin = vec_and(q,toM128((int)0x3));
+	__m128i temp = _mm_add_epi32(_mm_set1_epi32(1),(__m128i &)offsetSin);
+	offsetCos = (__m128 &)temp;
+
+    // Remainder in range [-pi/4..pi/4]
+    //
+    vec_float4 qf = vec_ctf(q,0);
+    xl  = vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC2),vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC1),x));
+    
+    // Compute x^2 and x^3
+    //
+    xl2 = vec_mul(xl,xl);
+    xl3 = vec_mul(xl2,xl);
+    
+    // Compute both the sin and cos of the angles
+    // using a polynomial expression:
+    //   cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and
+    //   sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2)
+    //
+    vec_float4 cx =
+		vec_madd(
+			vec_madd(
+				vec_madd(_mm_set1_ps(_SINCOS_CC0),xl2,_mm_set1_ps(_SINCOS_CC1)),xl2,_mm_set1_ps(_SINCOS_CC2)),xl2,_mm_set1_ps(1.0f));
+    vec_float4 sx =
+		vec_madd(
+			vec_madd(
+				vec_madd(_mm_set1_ps(_SINCOS_SC0),xl2,_mm_set1_ps(_SINCOS_SC1)),xl2,_mm_set1_ps(_SINCOS_SC2)),xl3,xl);
+
+    // Use the cosine when the offset is odd and the sin
+    // when the offset is even
+    //
+    vec_uint4 sinMask = (vec_uint4)vec_cmpeq(vec_and(offsetSin,toM128(0x1)),_mm_setzero_ps());
+    vec_uint4 cosMask = (vec_uint4)vec_cmpeq(vec_and(offsetCos,toM128(0x1)),_mm_setzero_ps());    
+    *s = vec_sel(cx,sx,sinMask);
+    *c = vec_sel(cx,sx,cosMask);
+
+    // Flip the sign of the result when (offset mod 4) = 1 or 2
+    //
+    sinMask = vec_cmpeq(vec_and(offsetSin,toM128(0x2)),_mm_setzero_ps());
+    cosMask = vec_cmpeq(vec_and(offsetCos,toM128(0x2)),_mm_setzero_ps());
+    
+    *s = vec_sel((vec_float4)vec_xor(toM128(0x80000000),(vec_uint4)*s),*s,sinMask);
+    *c = vec_sel((vec_float4)vec_xor(toM128(0x80000000),(vec_uint4)*c),*c,cosMask);    
+}
+
+#include "vecidx_aos.h"
+#include "floatInVec.h"
+#include "boolInVec.h"
+
+#ifdef _VECTORMATH_DEBUG
+#include <stdio.h>
+#endif
+namespace Vectormath {
+
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// Forward Declarations
+//
+
+class Vector3;
+class Vector4;
+class Point3;
+class Quat;
+class Matrix3;
+class Matrix4;
+class Transform3;
+
+// A 3-D vector in array-of-structures format
+//
+class Vector3
+{
+    __m128 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector3( ) { };
+
+    // Construct a 3-D vector from x, y, and z elements
+    // 
+    inline Vector3( float x, float y, float z );
+
+    // Construct a 3-D vector from x, y, and z elements (scalar data contained in vector data type)
+    // 
+    inline Vector3( const floatInVec &x, const floatInVec &y, const floatInVec &z );
+
+    // Copy elements from a 3-D point into a 3-D vector
+    // 
+    explicit inline Vector3( const Point3 &pnt );
+
+    // Set all elements of a 3-D vector to the same scalar value
+    // 
+    explicit inline Vector3( float scalar );
+
+    // Set all elements of a 3-D vector to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit inline Vector3( const floatInVec &scalar );
+
+    // Set vector float data in a 3-D vector
+    // 
+    explicit inline Vector3( __m128 vf4 );
+
+    // Get vector float data from a 3-D vector
+    // 
+    inline __m128 get128( ) const;
+
+    // Assign one 3-D vector to another
+    // 
+    inline Vector3 & operator =( const Vector3 &vec );
+
+    // Set the x element of a 3-D vector
+    // 
+    inline Vector3 & setX( float x );
+
+    // Set the y element of a 3-D vector
+    // 
+    inline Vector3 & setY( float y );
+
+    // Set the z element of a 3-D vector
+    // 
+    inline Vector3 & setZ( float z );
+
+    // Set the x element of a 3-D vector (scalar data contained in vector data type)
+    // 
+    inline Vector3 & setX( const floatInVec &x );
+
+    // Set the y element of a 3-D vector (scalar data contained in vector data type)
+    // 
+    inline Vector3 & setY( const floatInVec &y );
+
+    // Set the z element of a 3-D vector (scalar data contained in vector data type)
+    // 
+    inline Vector3 & setZ( const floatInVec &z );
+
+    // Get the x element of a 3-D vector
+    // 
+    inline const floatInVec getX( ) const;
+
+    // Get the y element of a 3-D vector
+    // 
+    inline const floatInVec getY( ) const;
+
+    // Get the z element of a 3-D vector
+    // 
+    inline const floatInVec getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D vector by index
+    // 
+    inline Vector3 & setElem( int idx, float value );
+
+    // Set an x, y, or z element of a 3-D vector by index (scalar data contained in vector data type)
+    // 
+    inline Vector3 & setElem( int idx, const floatInVec &value );
+
+    // Get an x, y, or z element of a 3-D vector by index
+    // 
+    inline const floatInVec getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline const floatInVec operator []( int idx ) const;
+
+    // Add two 3-D vectors
+    // 
+    inline const Vector3 operator +( const Vector3 &vec ) const;
+
+    // Subtract a 3-D vector from another 3-D vector
+    // 
+    inline const Vector3 operator -( const Vector3 &vec ) const;
+
+    // Add a 3-D vector to a 3-D point
+    // 
+    inline const Point3 operator +( const Point3 &pnt ) const;
+
+    // Multiply a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator *( float scalar ) const;
+
+    // Divide a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator /( float scalar ) const;
+
+    // Multiply a 3-D vector by a scalar (scalar data contained in vector data type)
+    // 
+    inline const Vector3 operator *( const floatInVec &scalar ) const;
+
+    // Divide a 3-D vector by a scalar (scalar data contained in vector data type)
+    // 
+    inline const Vector3 operator /( const floatInVec &scalar ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Vector3 & operator +=( const Vector3 &vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Vector3 & operator -=( const Vector3 &vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector3 & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector3 & operator /=( float scalar );
+
+    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
+    // 
+    inline Vector3 & operator *=( const floatInVec &scalar );
+
+    // Perform compound assignment and division by a scalar (scalar data contained in vector data type)
+    // 
+    inline Vector3 & operator /=( const floatInVec &scalar );
+
+    // Negate all elements of a 3-D vector
+    // 
+    inline const Vector3 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector3 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector3 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector3 zAxis( );
+
+};
+
+// Multiply a 3-D vector by a scalar
+// 
+inline const Vector3 operator *( float scalar, const Vector3 &vec );
+
+// Multiply a 3-D vector by a scalar (scalar data contained in vector data type)
+// 
+inline const Vector3 operator *( const floatInVec &scalar, const Vector3 &vec );
+
+// Multiply two 3-D vectors per element
+// 
+inline const Vector3 mulPerElem( const Vector3 &vec0, const Vector3 &vec1 );
+
+// Divide two 3-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector3 divPerElem( const Vector3 &vec0, const Vector3 &vec1 );
+
+// Compute the reciprocal of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector3 recipPerElem( const Vector3 &vec );
+
+// Compute the absolute value of a 3-D vector per element
+// 
+inline const Vector3 absPerElem( const Vector3 &vec );
+
+// Copy sign from one 3-D vector to another, per element
+// 
+inline const Vector3 copySignPerElem( const Vector3 &vec0, const Vector3 &vec1 );
+
+// Maximum of two 3-D vectors per element
+// 
+inline const Vector3 maxPerElem( const Vector3 &vec0, const Vector3 &vec1 );
+
+// Minimum of two 3-D vectors per element
+// 
+inline const Vector3 minPerElem( const Vector3 &vec0, const Vector3 &vec1 );
+
+// Maximum element of a 3-D vector
+// 
+inline const floatInVec maxElem( const Vector3 &vec );
+
+// Minimum element of a 3-D vector
+// 
+inline const floatInVec minElem( const Vector3 &vec );
+
+// Compute the sum of all elements of a 3-D vector
+// 
+inline const floatInVec sum( const Vector3 &vec );
+
+// Compute the dot product of two 3-D vectors
+// 
+inline const floatInVec dot( const Vector3 &vec0, const Vector3 &vec1 );
+
+// Compute the square of the length of a 3-D vector
+// 
+inline const floatInVec lengthSqr( const Vector3 &vec );
+
+// Compute the length of a 3-D vector
+// 
+inline const floatInVec length( const Vector3 &vec );
+
+// Normalize a 3-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector3 normalize( const Vector3 &vec );
+
+// Compute cross product of two 3-D vectors
+// 
+inline const Vector3 cross( const Vector3 &vec0, const Vector3 &vec1 );
+
+// Outer product of two 3-D vectors
+// 
+inline const Matrix3 outer( const Vector3 &vec0, const Vector3 &vec1 );
+
+// Pre-multiply a row vector by a 3x3 matrix
+// NOTE: 
+// Slower than column post-multiply.
+// 
+inline const Vector3 rowMul( const Vector3 &vec, const Matrix3 & mat );
+
+// Cross-product matrix of a 3-D vector
+// 
+inline const Matrix3 crossMatrix( const Vector3 &vec );
+
+// Create cross-product matrix and multiply
+// NOTE: 
+// Faster than separately creating a cross-product matrix and multiplying.
+// 
+inline const Matrix3 crossMatrixMul( const Vector3 &vec, const Matrix3 & mat );
+
+// Linear interpolation between two 3-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 lerp( float t, const Vector3 &vec0, const Vector3 &vec1 );
+
+// Linear interpolation between two 3-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 lerp( const floatInVec &t, const Vector3 &vec0, const Vector3 &vec1 );
+
+// Spherical linear interpolation between two 3-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 slerp( float t, const Vector3 &unitVec0, const Vector3 &unitVec1 );
+
+// Spherical linear interpolation between two 3-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 slerp( const floatInVec &t, const Vector3 &unitVec0, const Vector3 &unitVec1 );
+
+// Conditionally select between two 3-D vectors
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+inline const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, bool select1 );
+
+// Conditionally select between two 3-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, const boolInVec &select1 );
+
+// Store x, y, and z elements of 3-D vector in first three words of a quadword, preserving fourth word
+// 
+inline void storeXYZ( const Vector3 &vec, __m128 * quad );
+
+// Load four three-float 3-D vectors, stored in three quadwords
+// 
+inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const __m128 * threeQuads );
+
+// Store four 3-D vectors in three quadwords
+// 
+inline void storeXYZArray( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, __m128 * threeQuads );
+
+// Store eight 3-D vectors as half-floats
+// 
+inline void storeHalfFloats( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, const Vector3 &vec4, const Vector3 &vec5, const Vector3 &vec6, const Vector3 &vec7, vec_ushort8 * threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector3 &vec );
+
+// Print a 3-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector3 &vec, const char * name );
+
+#endif
+
+// A 4-D vector in array-of-structures format
+//
+class Vector4
+{
+    __m128 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector4( ) { };
+
+    // Construct a 4-D vector from x, y, z, and w elements
+    // 
+    inline Vector4( float x, float y, float z, float w );
+
+    // Construct a 4-D vector from x, y, z, and w elements (scalar data contained in vector data type)
+    // 
+    inline Vector4( const floatInVec &x, const floatInVec &y, const floatInVec &z, const floatInVec &w );
+
+    // Construct a 4-D vector from a 3-D vector and a scalar
+    // 
+    inline Vector4( const Vector3 &xyz, float w );
+
+    // Construct a 4-D vector from a 3-D vector and a scalar (scalar data contained in vector data type)
+    // 
+    inline Vector4( const Vector3 &xyz, const floatInVec &w );
+
+    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+    // 
+    explicit inline Vector4( const Vector3 &vec );
+
+    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+    // 
+    explicit inline Vector4( const Point3 &pnt );
+
+    // Copy elements from a quaternion into a 4-D vector
+    // 
+    explicit inline Vector4( const Quat &quat );
+
+    // Set all elements of a 4-D vector to the same scalar value
+    // 
+    explicit inline Vector4( float scalar );
+
+    // Set all elements of a 4-D vector to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit inline Vector4( const floatInVec &scalar );
+
+    // Set vector float data in a 4-D vector
+    // 
+    explicit inline Vector4( __m128 vf4 );
+
+    // Get vector float data from a 4-D vector
+    // 
+    inline __m128 get128( ) const;
+
+    // Assign one 4-D vector to another
+    // 
+    inline Vector4 & operator =( const Vector4 &vec );
+
+    // Set the x, y, and z elements of a 4-D vector
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Vector4 & setXYZ( const Vector3 &vec );
+
+    // Get the x, y, and z elements of a 4-D vector
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a 4-D vector
+    // 
+    inline Vector4 & setX( float x );
+
+    // Set the y element of a 4-D vector
+    // 
+    inline Vector4 & setY( float y );
+
+    // Set the z element of a 4-D vector
+    // 
+    inline Vector4 & setZ( float z );
+
+    // Set the w element of a 4-D vector
+    // 
+    inline Vector4 & setW( float w );
+
+    // Set the x element of a 4-D vector (scalar data contained in vector data type)
+    // 
+    inline Vector4 & setX( const floatInVec &x );
+
+    // Set the y element of a 4-D vector (scalar data contained in vector data type)
+    // 
+    inline Vector4 & setY( const floatInVec &y );
+
+    // Set the z element of a 4-D vector (scalar data contained in vector data type)
+    // 
+    inline Vector4 & setZ( const floatInVec &z );
+
+    // Set the w element of a 4-D vector (scalar data contained in vector data type)
+    // 
+    inline Vector4 & setW( const floatInVec &w );
+
+    // Get the x element of a 4-D vector
+    // 
+    inline const floatInVec getX( ) const;
+
+    // Get the y element of a 4-D vector
+    // 
+    inline const floatInVec getY( ) const;
+
+    // Get the z element of a 4-D vector
+    // 
+    inline const floatInVec getZ( ) const;
+
+    // Get the w element of a 4-D vector
+    // 
+    inline const floatInVec getW( ) const;
+
+    // Set an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline Vector4 & setElem( int idx, float value );
+
+    // Set an x, y, z, or w element of a 4-D vector by index (scalar data contained in vector data type)
+    // 
+    inline Vector4 & setElem( int idx, const floatInVec &value );
+
+    // Get an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline const floatInVec getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline const floatInVec operator []( int idx ) const;
+
+    // Add two 4-D vectors
+    // 
+    inline const Vector4 operator +( const Vector4 &vec ) const;
+
+    // Subtract a 4-D vector from another 4-D vector
+    // 
+    inline const Vector4 operator -( const Vector4 &vec ) const;
+
+    // Multiply a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator *( float scalar ) const;
+
+    // Divide a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator /( float scalar ) const;
+
+    // Multiply a 4-D vector by a scalar (scalar data contained in vector data type)
+    // 
+    inline const Vector4 operator *( const floatInVec &scalar ) const;
+
+    // Divide a 4-D vector by a scalar (scalar data contained in vector data type)
+    // 
+    inline const Vector4 operator /( const floatInVec &scalar ) const;
+
+    // Perform compound assignment and addition with a 4-D vector
+    // 
+    inline Vector4 & operator +=( const Vector4 &vec );
+
+    // Perform compound assignment and subtraction by a 4-D vector
+    // 
+    inline Vector4 & operator -=( const Vector4 &vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector4 & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector4 & operator /=( float scalar );
+
+    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
+    // 
+    inline Vector4 & operator *=( const floatInVec &scalar );
+
+    // Perform compound assignment and division by a scalar (scalar data contained in vector data type)
+    // 
+    inline Vector4 & operator /=( const floatInVec &scalar );
+
+    // Negate all elements of a 4-D vector
+    // 
+    inline const Vector4 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector4 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector4 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector4 zAxis( );
+
+    // Construct w axis
+    // 
+    static inline const Vector4 wAxis( );
+
+};
+
+// Multiply a 4-D vector by a scalar
+// 
+inline const Vector4 operator *( float scalar, const Vector4 &vec );
+
+// Multiply a 4-D vector by a scalar (scalar data contained in vector data type)
+// 
+inline const Vector4 operator *( const floatInVec &scalar, const Vector4 &vec );
+
+// Multiply two 4-D vectors per element
+// 
+inline const Vector4 mulPerElem( const Vector4 &vec0, const Vector4 &vec1 );
+
+// Divide two 4-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector4 divPerElem( const Vector4 &vec0, const Vector4 &vec1 );
+
+// Compute the reciprocal of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector4 recipPerElem( const Vector4 &vec );
+
+// Compute the absolute value of a 4-D vector per element
+// 
+inline const Vector4 absPerElem( const Vector4 &vec );
+
+// Copy sign from one 4-D vector to another, per element
+// 
+inline const Vector4 copySignPerElem( const Vector4 &vec0, const Vector4 &vec1 );
+
+// Maximum of two 4-D vectors per element
+// 
+inline const Vector4 maxPerElem( const Vector4 &vec0, const Vector4 &vec1 );
+
+// Minimum of two 4-D vectors per element
+// 
+inline const Vector4 minPerElem( const Vector4 &vec0, const Vector4 &vec1 );
+
+// Maximum element of a 4-D vector
+// 
+inline const floatInVec maxElem( const Vector4 &vec );
+
+// Minimum element of a 4-D vector
+// 
+inline const floatInVec minElem( const Vector4 &vec );
+
+// Compute the sum of all elements of a 4-D vector
+// 
+inline const floatInVec sum( const Vector4 &vec );
+
+// Compute the dot product of two 4-D vectors
+// 
+inline const floatInVec dot( const Vector4 &vec0, const Vector4 &vec1 );
+
+// Compute the square of the length of a 4-D vector
+// 
+inline const floatInVec lengthSqr( const Vector4 &vec );
+
+// Compute the length of a 4-D vector
+// 
+inline const floatInVec length( const Vector4 &vec );
+
+// Normalize a 4-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector4 normalize( const Vector4 &vec );
+
+// Outer product of two 4-D vectors
+// 
+inline const Matrix4 outer( const Vector4 &vec0, const Vector4 &vec1 );
+
+// Linear interpolation between two 4-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 lerp( float t, const Vector4 &vec0, const Vector4 &vec1 );
+
+// Linear interpolation between two 4-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 lerp( const floatInVec &t, const Vector4 &vec0, const Vector4 &vec1 );
+
+// Spherical linear interpolation between two 4-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 slerp( float t, const Vector4 &unitVec0, const Vector4 &unitVec1 );
+
+// Spherical linear interpolation between two 4-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 slerp( const floatInVec &t, const Vector4 &unitVec0, const Vector4 &unitVec1 );
+
+// Conditionally select between two 4-D vectors
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+inline const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, bool select1 );
+
+// Conditionally select between two 4-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, const boolInVec &select1 );
+
+// Store four 4-D vectors as half-floats
+// 
+inline void storeHalfFloats( const Vector4 &vec0, const Vector4 &vec1, const Vector4 &vec2, const Vector4 &vec3, vec_ushort8 * twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector4 &vec );
+
+// Print a 4-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector4 &vec, const char * name );
+
+#endif
+
+// A 3-D point in array-of-structures format
+//
+class Point3
+{
+    __m128 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Point3( ) { };
+
+    // Construct a 3-D point from x, y, and z elements
+    // 
+    inline Point3( float x, float y, float z );
+
+    // Construct a 3-D point from x, y, and z elements (scalar data contained in vector data type)
+    // 
+    inline Point3( const floatInVec &x, const floatInVec &y, const floatInVec &z );
+
+    // Copy elements from a 3-D vector into a 3-D point
+    // 
+    explicit inline Point3( const Vector3 &vec );
+
+    // Set all elements of a 3-D point to the same scalar value
+    // 
+    explicit inline Point3( float scalar );
+
+    // Set all elements of a 3-D point to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit inline Point3( const floatInVec &scalar );
+
+    // Set vector float data in a 3-D point
+    // 
+    explicit inline Point3( __m128 vf4 );
+
+    // Get vector float data from a 3-D point
+    // 
+    inline __m128 get128( ) const;
+
+    // Assign one 3-D point to another
+    // 
+    inline Point3 & operator =( const Point3 &pnt );
+
+    // Set the x element of a 3-D point
+    // 
+    inline Point3 & setX( float x );
+
+    // Set the y element of a 3-D point
+    // 
+    inline Point3 & setY( float y );
+
+    // Set the z element of a 3-D point
+    // 
+    inline Point3 & setZ( float z );
+
+    // Set the x element of a 3-D point (scalar data contained in vector data type)
+    // 
+    inline Point3 & setX( const floatInVec &x );
+
+    // Set the y element of a 3-D point (scalar data contained in vector data type)
+    // 
+    inline Point3 & setY( const floatInVec &y );
+
+    // Set the z element of a 3-D point (scalar data contained in vector data type)
+    // 
+    inline Point3 & setZ( const floatInVec &z );
+
+    // Get the x element of a 3-D point
+    // 
+    inline const floatInVec getX( ) const;
+
+    // Get the y element of a 3-D point
+    // 
+    inline const floatInVec getY( ) const;
+
+    // Get the z element of a 3-D point
+    // 
+    inline const floatInVec getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D point by index
+    // 
+    inline Point3 & setElem( int idx, float value );
+
+    // Set an x, y, or z element of a 3-D point by index (scalar data contained in vector data type)
+    // 
+    inline Point3 & setElem( int idx, const floatInVec &value );
+
+    // Get an x, y, or z element of a 3-D point by index
+    // 
+    inline const floatInVec getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline const floatInVec operator []( int idx ) const;
+
+    // Subtract a 3-D point from another 3-D point
+    // 
+    inline const Vector3 operator -( const Point3 &pnt ) const;
+
+    // Add a 3-D point to a 3-D vector
+    // 
+    inline const Point3 operator +( const Vector3 &vec ) const;
+
+    // Subtract a 3-D vector from a 3-D point
+    // 
+    inline const Point3 operator -( const Vector3 &vec ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Point3 & operator +=( const Vector3 &vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Point3 & operator -=( const Vector3 &vec );
+
+};
+
+// Multiply two 3-D points per element
+// 
+inline const Point3 mulPerElem( const Point3 &pnt0, const Point3 &pnt1 );
+
+// Divide two 3-D points per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Point3 divPerElem( const Point3 &pnt0, const Point3 &pnt1 );
+
+// Compute the reciprocal of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Point3 recipPerElem( const Point3 &pnt );
+
+// Compute the absolute value of a 3-D point per element
+// 
+inline const Point3 absPerElem( const Point3 &pnt );
+
+// Copy sign from one 3-D point to another, per element
+// 
+inline const Point3 copySignPerElem( const Point3 &pnt0, const Point3 &pnt1 );
+
+// Maximum of two 3-D points per element
+// 
+inline const Point3 maxPerElem( const Point3 &pnt0, const Point3 &pnt1 );
+
+// Minimum of two 3-D points per element
+// 
+inline const Point3 minPerElem( const Point3 &pnt0, const Point3 &pnt1 );
+
+// Maximum element of a 3-D point
+// 
+inline const floatInVec maxElem( const Point3 &pnt );
+
+// Minimum element of a 3-D point
+// 
+inline const floatInVec minElem( const Point3 &pnt );
+
+// Compute the sum of all elements of a 3-D point
+// 
+inline const floatInVec sum( const Point3 &pnt );
+
+// Apply uniform scale to a 3-D point
+// 
+inline const Point3 scale( const Point3 &pnt, float scaleVal );
+
+// Apply uniform scale to a 3-D point (scalar data contained in vector data type)
+// 
+inline const Point3 scale( const Point3 &pnt, const floatInVec &scaleVal );
+
+// Apply non-uniform scale to a 3-D point
+// 
+inline const Point3 scale( const Point3 &pnt, const Vector3 &scaleVec );
+
+// Scalar projection of a 3-D point on a unit-length 3-D vector
+// 
+inline const floatInVec projection( const Point3 &pnt, const Vector3 &unitVec );
+
+// Compute the square of the distance of a 3-D point from the coordinate-system origin
+// 
+inline const floatInVec distSqrFromOrigin( const Point3 &pnt );
+
+// Compute the distance of a 3-D point from the coordinate-system origin
+// 
+inline const floatInVec distFromOrigin( const Point3 &pnt );
+
+// Compute the square of the distance between two 3-D points
+// 
+inline const floatInVec distSqr( const Point3 &pnt0, const Point3 &pnt1 );
+
+// Compute the distance between two 3-D points
+// 
+inline const floatInVec dist( const Point3 &pnt0, const Point3 &pnt1 );
+
+// Linear interpolation between two 3-D points
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Point3 lerp( float t, const Point3 &pnt0, const Point3 &pnt1 );
+
+// Linear interpolation between two 3-D points (scalar data contained in vector data type)
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Point3 lerp( const floatInVec &t, const Point3 &pnt0, const Point3 &pnt1 );
+
+// Conditionally select between two 3-D points
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+inline const Point3 select( const Point3 &pnt0, const Point3 &pnt1, bool select1 );
+
+// Conditionally select between two 3-D points (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Point3 select( const Point3 &pnt0, const Point3 &pnt1, const boolInVec &select1 );
+
+// Store x, y, and z elements of 3-D point in first three words of a quadword, preserving fourth word
+// 
+inline void storeXYZ( const Point3 &pnt, __m128 * quad );
+
+// Load four three-float 3-D points, stored in three quadwords
+// 
+inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const __m128 * threeQuads );
+
+// Store four 3-D points in three quadwords
+// 
+inline void storeXYZArray( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, __m128 * threeQuads );
+
+// Store eight 3-D points as half-floats
+// 
+inline void storeHalfFloats( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, const Point3 &pnt4, const Point3 &pnt5, const Point3 &pnt6, const Point3 &pnt7, vec_ushort8 * threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D point
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Point3 &pnt );
+
+// Print a 3-D point and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Point3 &pnt, const char * name );
+
+#endif
+
+// A quaternion in array-of-structures format
+//
+class Quat
+{
+    __m128 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Quat( ) { };
+
+    // Construct a quaternion from x, y, z, and w elements
+    // 
+    inline Quat( float x, float y, float z, float w );
+
+    // Construct a quaternion from x, y, z, and w elements (scalar data contained in vector data type)
+    // 
+    inline Quat( const floatInVec &x, const floatInVec &y, const floatInVec &z, const floatInVec &w );
+
+    // Construct a quaternion from a 3-D vector and a scalar
+    // 
+    inline Quat( const Vector3 &xyz, float w );
+
+    // Construct a quaternion from a 3-D vector and a scalar (scalar data contained in vector data type)
+    // 
+    inline Quat( const Vector3 &xyz, const floatInVec &w );
+
+    // Copy elements from a 4-D vector into a quaternion
+    // 
+    explicit inline Quat( const Vector4 &vec );
+
+    // Convert a rotation matrix to a unit-length quaternion
+    // 
+    explicit inline Quat( const Matrix3 & rotMat );
+
+    // Set all elements of a quaternion to the same scalar value
+    // 
+    explicit inline Quat( float scalar );
+
+    // Set all elements of a quaternion to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit inline Quat( const floatInVec &scalar );
+
+    // Set vector float data in a quaternion
+    // 
+    explicit inline Quat( __m128 vf4 );
+
+    // Get vector float data from a quaternion
+    // 
+    inline __m128 get128( ) const;
+
+    // Assign one quaternion to another
+    // 
+    inline Quat & operator =( const Quat &quat );
+
+    // Set the x, y, and z elements of a quaternion
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Quat & setXYZ( const Vector3 &vec );
+
+    // Get the x, y, and z elements of a quaternion
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a quaternion
+    // 
+    inline Quat & setX( float x );
+
+    // Set the y element of a quaternion
+    // 
+    inline Quat & setY( float y );
+
+    // Set the z element of a quaternion
+    // 
+    inline Quat & setZ( float z );
+
+    // Set the w element of a quaternion
+    // 
+    inline Quat & setW( float w );
+
+    // Set the x element of a quaternion (scalar data contained in vector data type)
+    // 
+    inline Quat & setX( const floatInVec &x );
+
+    // Set the y element of a quaternion (scalar data contained in vector data type)
+    // 
+    inline Quat & setY( const floatInVec &y );
+
+    // Set the z element of a quaternion (scalar data contained in vector data type)
+    // 
+    inline Quat & setZ( const floatInVec &z );
+
+    // Set the w element of a quaternion (scalar data contained in vector data type)
+    // 
+    inline Quat & setW( const floatInVec &w );
+
+    // Get the x element of a quaternion
+    // 
+    inline const floatInVec getX( ) const;
+
+    // Get the y element of a quaternion
+    // 
+    inline const floatInVec getY( ) const;
+
+    // Get the z element of a quaternion
+    // 
+    inline const floatInVec getZ( ) const;
+
+    // Get the w element of a quaternion
+    // 
+    inline const floatInVec getW( ) const;
+
+    // Set an x, y, z, or w element of a quaternion by index
+    // 
+    inline Quat & setElem( int idx, float value );
+
+    // Set an x, y, z, or w element of a quaternion by index (scalar data contained in vector data type)
+    // 
+    inline Quat & setElem( int idx, const floatInVec &value );
+
+    // Get an x, y, z, or w element of a quaternion by index
+    // 
+    inline const floatInVec getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline const floatInVec operator []( int idx ) const;
+
+    // Add two quaternions
+    // 
+    inline const Quat operator +( const Quat &quat ) const;
+
+    // Subtract a quaternion from another quaternion
+    // 
+    inline const Quat operator -( const Quat &quat ) const;
+
+    // Multiply two quaternions
+    // 
+    inline const Quat operator *( const Quat &quat ) const;
+
+    // Multiply a quaternion by a scalar
+    // 
+    inline const Quat operator *( float scalar ) const;
+
+    // Divide a quaternion by a scalar
+    // 
+    inline const Quat operator /( float scalar ) const;
+
+    // Multiply a quaternion by a scalar (scalar data contained in vector data type)
+    // 
+    inline const Quat operator *( const floatInVec &scalar ) const;
+
+    // Divide a quaternion by a scalar (scalar data contained in vector data type)
+    // 
+    inline const Quat operator /( const floatInVec &scalar ) const;
+
+    // Perform compound assignment and addition with a quaternion
+    // 
+    inline Quat & operator +=( const Quat &quat );
+
+    // Perform compound assignment and subtraction by a quaternion
+    // 
+    inline Quat & operator -=( const Quat &quat );
+
+    // Perform compound assignment and multiplication by a quaternion
+    // 
+    inline Quat & operator *=( const Quat &quat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Quat & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Quat & operator /=( float scalar );
+
+    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
+    // 
+    inline Quat & operator *=( const floatInVec &scalar );
+
+    // Perform compound assignment and division by a scalar (scalar data contained in vector data type)
+    // 
+    inline Quat & operator /=( const floatInVec &scalar );
+
+    // Negate all elements of a quaternion
+    // 
+    inline const Quat operator -( ) const;
+
+    // Construct an identity quaternion
+    // 
+    static inline const Quat identity( );
+
+    // Construct a quaternion to rotate between two unit-length 3-D vectors
+    // NOTE: 
+    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+    // 
+    static inline const Quat rotation( const Vector3 &unitVec0, const Vector3 &unitVec1 );
+
+    // Construct a quaternion to rotate around a unit-length 3-D vector
+    // 
+    static inline const Quat rotation( float radians, const Vector3 &unitVec );
+
+    // Construct a quaternion to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
+    // 
+    static inline const Quat rotation( const floatInVec &radians, const Vector3 &unitVec );
+
+    // Construct a quaternion to rotate around the x axis
+    // 
+    static inline const Quat rotationX( float radians );
+
+    // Construct a quaternion to rotate around the y axis
+    // 
+    static inline const Quat rotationY( float radians );
+
+    // Construct a quaternion to rotate around the z axis
+    // 
+    static inline const Quat rotationZ( float radians );
+
+    // Construct a quaternion to rotate around the x axis (scalar data contained in vector data type)
+    // 
+    static inline const Quat rotationX( const floatInVec &radians );
+
+    // Construct a quaternion to rotate around the y axis (scalar data contained in vector data type)
+    // 
+    static inline const Quat rotationY( const floatInVec &radians );
+
+    // Construct a quaternion to rotate around the z axis (scalar data contained in vector data type)
+    // 
+    static inline const Quat rotationZ( const floatInVec &radians );
+
+};
+
+// Multiply a quaternion by a scalar
+// 
+inline const Quat operator *( float scalar, const Quat &quat );
+
+// Multiply a quaternion by a scalar (scalar data contained in vector data type)
+// 
+inline const Quat operator *( const floatInVec &scalar, const Quat &quat );
+
+// Compute the conjugate of a quaternion
+// 
+inline const Quat conj( const Quat &quat );
+
+// Use a unit-length quaternion to rotate a 3-D vector
+// 
+inline const Vector3 rotate( const Quat &unitQuat, const Vector3 &vec );
+
+// Compute the dot product of two quaternions
+// 
+inline const floatInVec dot( const Quat &quat0, const Quat &quat1 );
+
+// Compute the norm of a quaternion
+// 
+inline const floatInVec norm( const Quat &quat );
+
+// Compute the length of a quaternion
+// 
+inline const floatInVec length( const Quat &quat );
+
+// Normalize a quaternion
+// NOTE: 
+// The result is unpredictable when all elements of quat are at or near zero.
+// 
+inline const Quat normalize( const Quat &quat );
+
+// Linear interpolation between two quaternions
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat lerp( float t, const Quat &quat0, const Quat &quat1 );
+
+// Linear interpolation between two quaternions (scalar data contained in vector data type)
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat lerp( const floatInVec &t, const Quat &quat0, const Quat &quat1 );
+
+// Spherical linear interpolation between two quaternions
+// NOTE: 
+// Interpolates along the shortest path between orientations.
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat slerp( float t, const Quat &unitQuat0, const Quat &unitQuat1 );
+
+// Spherical linear interpolation between two quaternions (scalar data contained in vector data type)
+// NOTE: 
+// Interpolates along the shortest path between orientations.
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat slerp( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1 );
+
+// Spherical quadrangle interpolation
+// 
+inline const Quat squad( float t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 );
+
+// Spherical quadrangle interpolation (scalar data contained in vector data type)
+// 
+inline const Quat squad( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 );
+
+// Conditionally select between two quaternions
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+inline const Quat select( const Quat &quat0, const Quat &quat1, bool select1 );
+
+// Conditionally select between two quaternions (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Quat select( const Quat &quat0, const Quat &quat1, const boolInVec &select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a quaternion
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Quat &quat );
+
+// Print a quaternion and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Quat &quat, const char * name );
+
+#endif
+
+// A 3x3 matrix in array-of-structures format
+//
+class Matrix3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix3( ) { };
+
+    // Copy a 3x3 matrix
+    // 
+    inline Matrix3( const Matrix3 & mat );
+
+    // Construct a 3x3 matrix containing the specified columns
+    // 
+    inline Matrix3( const Vector3 &col0, const Vector3 &col1, const Vector3 &col2 );
+
+    // Construct a 3x3 rotation matrix from a unit-length quaternion
+    // 
+    explicit inline Matrix3( const Quat &unitQuat );
+
+    // Set all elements of a 3x3 matrix to the same scalar value
+    // 
+    explicit inline Matrix3( float scalar );
+
+    // Set all elements of a 3x3 matrix to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit inline Matrix3( const floatInVec &scalar );
+
+    // Assign one 3x3 matrix to another
+    // 
+    inline Matrix3 & operator =( const Matrix3 & mat );
+
+    // Set column 0 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol0( const Vector3 &col0 );
+
+    // Set column 1 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol1( const Vector3 &col1 );
+
+    // Set column 2 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol2( const Vector3 &col2 );
+
+    // Get column 0 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Set the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setCol( int col, const Vector3 &vec );
+
+    // Set the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setRow( int row, const Vector3 &vec );
+
+    // Get the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline Matrix3 & setElem( int col, int row, float val );
+
+    // Set the element of a 3x3 matrix referred to by column and row indices (scalar data contained in vector data type)
+    // 
+    inline Matrix3 & setElem( int col, int row, const floatInVec &val );
+
+    // Get the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline const floatInVec getElem( int col, int row ) const;
+
+    // Add two 3x3 matrices
+    // 
+    inline const Matrix3 operator +( const Matrix3 & mat ) const;
+
+    // Subtract a 3x3 matrix from another 3x3 matrix
+    // 
+    inline const Matrix3 operator -( const Matrix3 & mat ) const;
+
+    // Negate all elements of a 3x3 matrix
+    // 
+    inline const Matrix3 operator -( ) const;
+
+    // Multiply a 3x3 matrix by a scalar
+    // 
+    inline const Matrix3 operator *( float scalar ) const;
+
+    // Multiply a 3x3 matrix by a scalar (scalar data contained in vector data type)
+    // 
+    inline const Matrix3 operator *( const floatInVec &scalar ) const;
+
+    // Multiply a 3x3 matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( const Vector3 &vec ) const;
+
+    // Multiply two 3x3 matrices
+    // 
+    inline const Matrix3 operator *( const Matrix3 & mat ) const;
+
+    // Perform compound assignment and addition with a 3x3 matrix
+    // 
+    inline Matrix3 & operator +=( const Matrix3 & mat );
+
+    // Perform compound assignment and subtraction by a 3x3 matrix
+    // 
+    inline Matrix3 & operator -=( const Matrix3 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix3 & operator *=( float scalar );
+
+    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
+    // 
+    inline Matrix3 & operator *=( const floatInVec &scalar );
+
+    // Perform compound assignment and multiplication by a 3x3 matrix
+    // 
+    inline Matrix3 & operator *=( const Matrix3 & mat );
+
+    // Construct an identity 3x3 matrix
+    // 
+    static inline const Matrix3 identity( );
+
+    // Construct a 3x3 matrix to rotate around the x axis
+    // 
+    static inline const Matrix3 rotationX( float radians );
+
+    // Construct a 3x3 matrix to rotate around the y axis
+    // 
+    static inline const Matrix3 rotationY( float radians );
+
+    // Construct a 3x3 matrix to rotate around the z axis
+    // 
+    static inline const Matrix3 rotationZ( float radians );
+
+    // Construct a 3x3 matrix to rotate around the x axis (scalar data contained in vector data type)
+    // 
+    static inline const Matrix3 rotationX( const floatInVec &radians );
+
+    // Construct a 3x3 matrix to rotate around the y axis (scalar data contained in vector data type)
+    // 
+    static inline const Matrix3 rotationY( const floatInVec &radians );
+
+    // Construct a 3x3 matrix to rotate around the z axis (scalar data contained in vector data type)
+    // 
+    static inline const Matrix3 rotationZ( const floatInVec &radians );
+
+    // Construct a 3x3 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix3 rotationZYX( const Vector3 &radiansXYZ );
+
+    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix3 rotation( float radians, const Vector3 &unitVec );
+
+    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
+    // 
+    static inline const Matrix3 rotation( const floatInVec &radians, const Vector3 &unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix3 rotation( const Quat &unitQuat );
+
+    // Construct a 3x3 matrix to perform scaling
+    // 
+    static inline const Matrix3 scale( const Vector3 &scaleVec );
+
+};
+// Multiply a 3x3 matrix by a scalar
+// 
+inline const Matrix3 operator *( float scalar, const Matrix3 & mat );
+
+// Multiply a 3x3 matrix by a scalar (scalar data contained in vector data type)
+// 
+inline const Matrix3 operator *( const floatInVec &scalar, const Matrix3 & mat );
+
+// Append (post-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 &scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 prependScale( const Vector3 &scaleVec, const Matrix3 & mat );
+
+// Multiply two 3x3 matrices per element
+// 
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
+
+// Compute the absolute value of a 3x3 matrix per element
+// 
+inline const Matrix3 absPerElem( const Matrix3 & mat );
+
+// Transpose of a 3x3 matrix
+// 
+inline const Matrix3 transpose( const Matrix3 & mat );
+
+// Compute the inverse of a 3x3 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix3 inverse( const Matrix3 & mat );
+
+// Determinant of a 3x3 matrix
+// 
+inline const floatInVec determinant( const Matrix3 & mat );
+
+// Conditionally select between two 3x3 matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 );
+
+// Conditionally select between two 3x3 matrices (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, const boolInVec &select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x3 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat );
+
+// Print a 3x3 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat, const char * name );
+
+#endif
+
+// A 4x4 matrix in array-of-structures format
+//
+class Matrix4
+{
+    Vector4 mCol0;
+    Vector4 mCol1;
+    Vector4 mCol2;
+    Vector4 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix4( ) { };
+
+    // Copy a 4x4 matrix
+    // 
+    inline Matrix4( const Matrix4 & mat );
+
+    // Construct a 4x4 matrix containing the specified columns
+    // 
+    inline Matrix4( const Vector4 &col0, const Vector4 &col1, const Vector4 &col2, const Vector4 &col3 );
+
+    // Construct a 4x4 matrix from a 3x4 transformation matrix
+    // 
+    explicit inline Matrix4( const Transform3 & mat );
+
+    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Matrix4( const Matrix3 & mat, const Vector3 &translateVec );
+
+    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Matrix4( const Quat &unitQuat, const Vector3 &translateVec );
+
+    // Set all elements of a 4x4 matrix to the same scalar value
+    // 
+    explicit inline Matrix4( float scalar );
+
+    // Set all elements of a 4x4 matrix to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit inline Matrix4( const floatInVec &scalar );
+
+    // Assign one 4x4 matrix to another
+    // 
+    inline Matrix4 & operator =( const Matrix4 & mat );
+
+    // Set the upper-left 3x3 submatrix
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 4x4 matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setTranslation( const Vector3 &translateVec );
+
+    // Get the translation component of a 4x4 matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol0( const Vector4 &col0 );
+
+    // Set column 1 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol1( const Vector4 &col1 );
+
+    // Set column 2 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol2( const Vector4 &col2 );
+
+    // Set column 3 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol3( const Vector4 &col3 );
+
+    // Get column 0 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol0( ) const;
+
+    // Get column 1 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol1( ) const;
+
+    // Get column 2 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol2( ) const;
+
+    // Get column 3 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol3( ) const;
+
+    // Set the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setCol( int col, const Vector4 &vec );
+
+    // Set the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setRow( int row, const Vector4 &vec );
+
+    // Get the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getCol( int col ) const;
+
+    // Get the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector4 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector4 operator []( int col ) const;
+
+    // Set the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline Matrix4 & setElem( int col, int row, float val );
+
+    // Set the element of a 4x4 matrix referred to by column and row indices (scalar data contained in vector data type)
+    // 
+    inline Matrix4 & setElem( int col, int row, const floatInVec &val );
+
+    // Get the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline const floatInVec getElem( int col, int row ) const;
+
+    // Add two 4x4 matrices
+    // 
+    inline const Matrix4 operator +( const Matrix4 & mat ) const;
+
+    // Subtract a 4x4 matrix from another 4x4 matrix
+    // 
+    inline const Matrix4 operator -( const Matrix4 & mat ) const;
+
+    // Negate all elements of a 4x4 matrix
+    // 
+    inline const Matrix4 operator -( ) const;
+
+    // Multiply a 4x4 matrix by a scalar
+    // 
+    inline const Matrix4 operator *( float scalar ) const;
+
+    // Multiply a 4x4 matrix by a scalar (scalar data contained in vector data type)
+    // 
+    inline const Matrix4 operator *( const floatInVec &scalar ) const;
+
+    // Multiply a 4x4 matrix by a 4-D vector
+    // 
+    inline const Vector4 operator *( const Vector4 &vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D vector
+    // 
+    inline const Vector4 operator *( const Vector3 &vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D point
+    // 
+    inline const Vector4 operator *( const Point3 &pnt ) const;
+
+    // Multiply two 4x4 matrices
+    // 
+    inline const Matrix4 operator *( const Matrix4 & mat ) const;
+
+    // Multiply a 4x4 matrix by a 3x4 transformation matrix
+    // 
+    inline const Matrix4 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and addition with a 4x4 matrix
+    // 
+    inline Matrix4 & operator +=( const Matrix4 & mat );
+
+    // Perform compound assignment and subtraction by a 4x4 matrix
+    // 
+    inline Matrix4 & operator -=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix4 & operator *=( float scalar );
+
+    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
+    // 
+    inline Matrix4 & operator *=( const floatInVec &scalar );
+
+    // Perform compound assignment and multiplication by a 4x4 matrix
+    // 
+    inline Matrix4 & operator *=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Matrix4 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 4x4 matrix
+    // 
+    static inline const Matrix4 identity( );
+
+    // Construct a 4x4 matrix to rotate around the x axis
+    // 
+    static inline const Matrix4 rotationX( float radians );
+
+    // Construct a 4x4 matrix to rotate around the y axis
+    // 
+    static inline const Matrix4 rotationY( float radians );
+
+    // Construct a 4x4 matrix to rotate around the z axis
+    // 
+    static inline const Matrix4 rotationZ( float radians );
+
+    // Construct a 4x4 matrix to rotate around the x axis (scalar data contained in vector data type)
+    // 
+    static inline const Matrix4 rotationX( const floatInVec &radians );
+
+    // Construct a 4x4 matrix to rotate around the y axis (scalar data contained in vector data type)
+    // 
+    static inline const Matrix4 rotationY( const floatInVec &radians );
+
+    // Construct a 4x4 matrix to rotate around the z axis (scalar data contained in vector data type)
+    // 
+    static inline const Matrix4 rotationZ( const floatInVec &radians );
+
+    // Construct a 4x4 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix4 rotationZYX( const Vector3 &radiansXYZ );
+
+    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix4 rotation( float radians, const Vector3 &unitVec );
+
+    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
+    // 
+    static inline const Matrix4 rotation( const floatInVec &radians, const Vector3 &unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix4 rotation( const Quat &unitQuat );
+
+    // Construct a 4x4 matrix to perform scaling
+    // 
+    static inline const Matrix4 scale( const Vector3 &scaleVec );
+
+    // Construct a 4x4 matrix to perform translation
+    // 
+    static inline const Matrix4 translation( const Vector3 &translateVec );
+
+    // Construct viewing matrix based on eye, position looked at, and up direction
+    // 
+    static inline const Matrix4 lookAt( const Point3 &eyePos, const Point3 &lookAtPos, const Vector3 &upVec );
+
+    // Construct a perspective projection matrix
+    // 
+    static inline const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar );
+
+    // Construct a perspective projection matrix based on frustum
+    // 
+    static inline const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar );
+
+    // Construct an orthographic projection matrix
+    // 
+    static inline const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar );
+
+};
+// Multiply a 4x4 matrix by a scalar
+// 
+inline const Matrix4 operator *( float scalar, const Matrix4 & mat );
+
+// Multiply a 4x4 matrix by a scalar (scalar data contained in vector data type)
+// 
+inline const Matrix4 operator *( const floatInVec &scalar, const Matrix4 & mat );
+
+// Append (post-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 &scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 prependScale( const Vector3 &scaleVec, const Matrix4 & mat );
+
+// Multiply two 4x4 matrices per element
+// 
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
+
+// Compute the absolute value of a 4x4 matrix per element
+// 
+inline const Matrix4 absPerElem( const Matrix4 & mat );
+
+// Transpose of a 4x4 matrix
+// 
+inline const Matrix4 transpose( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 inverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 affineInverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+// 
+inline const Matrix4 orthoInverse( const Matrix4 & mat );
+
+// Determinant of a 4x4 matrix
+// 
+inline const floatInVec determinant( const Matrix4 & mat );
+
+// Conditionally select between two 4x4 matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 );
+
+// Conditionally select between two 4x4 matrices (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, const boolInVec &select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4x4 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat );
+
+// Print a 4x4 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat, const char * name );
+
+#endif
+
+// A 3x4 transformation matrix in array-of-structures format
+//
+class Transform3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+    Vector3 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Transform3( ) { };
+
+    // Copy a 3x4 transformation matrix
+    // 
+    inline Transform3( const Transform3 & tfrm );
+
+    // Construct a 3x4 transformation matrix containing the specified columns
+    // 
+    inline Transform3( const Vector3 &col0, const Vector3 &col1, const Vector3 &col2, const Vector3 &col3 );
+
+    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Transform3( const Matrix3 & tfrm, const Vector3 &translateVec );
+
+    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Transform3( const Quat &unitQuat, const Vector3 &translateVec );
+
+    // Set all elements of a 3x4 transformation matrix to the same scalar value
+    // 
+    explicit inline Transform3( float scalar );
+
+    // Set all elements of a 3x4 transformation matrix to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit inline Transform3( const floatInVec &scalar );
+
+    // Assign one 3x4 transformation matrix to another
+    // 
+    inline Transform3 & operator =( const Transform3 & tfrm );
+
+    // Set the upper-left 3x3 submatrix
+    // 
+    inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // 
+    inline Transform3 & setTranslation( const Vector3 &translateVec );
+
+    // Get the translation component of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol0( const Vector3 &col0 );
+
+    // Set column 1 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol1( const Vector3 &col1 );
+
+    // Set column 2 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol2( const Vector3 &col2 );
+
+    // Set column 3 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol3( const Vector3 &col3 );
+
+    // Get column 0 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Get column 3 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol3( ) const;
+
+    // Set the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setCol( int col, const Vector3 &vec );
+
+    // Set the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setRow( int row, const Vector4 &vec );
+
+    // Get the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline Transform3 & setElem( int col, int row, float val );
+
+    // Set the element of a 3x4 transformation matrix referred to by column and row indices (scalar data contained in vector data type)
+    // 
+    inline Transform3 & setElem( int col, int row, const floatInVec &val );
+
+    // Get the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline const floatInVec getElem( int col, int row ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( const Vector3 &vec ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D point
+    // 
+    inline const Point3 operator *( const Point3 &pnt ) const;
+
+    // Multiply two 3x4 transformation matrices
+    // 
+    inline const Transform3 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Transform3 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 3x4 transformation matrix
+    // 
+    static inline const Transform3 identity( );
+
+    // Construct a 3x4 transformation matrix to rotate around the x axis
+    // 
+    static inline const Transform3 rotationX( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the y axis
+    // 
+    static inline const Transform3 rotationY( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the z axis
+    // 
+    static inline const Transform3 rotationZ( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the x axis (scalar data contained in vector data type)
+    // 
+    static inline const Transform3 rotationX( const floatInVec &radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the y axis (scalar data contained in vector data type)
+    // 
+    static inline const Transform3 rotationY( const floatInVec &radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the z axis (scalar data contained in vector data type)
+    // 
+    static inline const Transform3 rotationZ( const floatInVec &radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Transform3 rotationZYX( const Vector3 &radiansXYZ );
+
+    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Transform3 rotation( float radians, const Vector3 &unitVec );
+
+    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
+    // 
+    static inline const Transform3 rotation( const floatInVec &radians, const Vector3 &unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Transform3 rotation( const Quat &unitQuat );
+
+    // Construct a 3x4 transformation matrix to perform scaling
+    // 
+    static inline const Transform3 scale( const Vector3 &scaleVec );
+
+    // Construct a 3x4 transformation matrix to perform translation
+    // 
+    static inline const Transform3 translation( const Vector3 &translateVec );
+
+};
+// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 &scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 prependScale( const Vector3 &scaleVec, const Transform3 & tfrm );
+
+// Multiply two 3x4 transformation matrices per element
+// 
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
+
+// Compute the absolute value of a 3x4 transformation matrix per element
+// 
+inline const Transform3 absPerElem( const Transform3 & tfrm );
+
+// Inverse of a 3x4 transformation matrix
+// NOTE: 
+// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+// 
+inline const Transform3 inverse( const Transform3 & tfrm );
+
+// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+// 
+inline const Transform3 orthoInverse( const Transform3 & tfrm );
+
+// Conditionally select between two 3x4 transformation matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 );
+
+// Conditionally select between two 3x4 transformation matrices (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, const boolInVec &select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x4 transformation matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm );
+
+// Print a 3x4 transformation matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm, const char * name );
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#include "vec_aos.h"
+#include "quat_aos.h"
+#include "mat_aos.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_aos.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_aos.h
index f6443a587..13f482150 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_aos.h
@@ -1,1833 +1,1833 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_AOS_C_H
-#define _VECTORMATH_MAT_AOS_C_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
- */
-#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B })
-#define _VECTORMATH_PERM_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_XZBX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X })     
-#define _VECTORMATH_PERM_CXXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_YAXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C })
-#define _VECTORMATH_PERM_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W, _VECTORMATH_PERM_Z })
-#define _VECTORMATH_PERM_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y })
-#define _VECTORMATH_PERM_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C })
-#define _VECTORMATH_PERM_ZAYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_BZXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
-#define _VECTORMATH_PERM_ZXXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_B })
-#define _VECTORMATH_PERM_YXXC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_C })
-#define _VECTORMATH_PERM_BBYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-static inline void vmathM3Copy( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Copy( &result->col0, &mat->col0 );
-    vmathV3Copy( &result->col1, &mat->col1 );
-    vmathV3Copy( &result->col2, &mat->col2 );
-}
-
-static inline void vmathM3MakeFromScalar( VmathMatrix3 *result, float scalar )
-{
-    vmathV3MakeFromScalar( &result->col0, scalar );
-    vmathV3MakeFromScalar( &result->col1, scalar );
-    vmathV3MakeFromScalar( &result->col2, scalar );
-}
-
-static inline void vmathM3MakeFromQ( VmathMatrix3 *result, const VmathQuat *unitQuat )
-{
-    vec_float4 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2;
-    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-    vec_uint4 select_x = _VECTORMATH_MASK_0xF000;
-    vec_uint4 select_z = _VECTORMATH_MASK_0x00F0;
-    xyzw_2 = vec_add( unitQuat->vec128, unitQuat->vec128 );
-    wwww = vec_splat( unitQuat->vec128, 3 );
-    yzxw = vec_perm( unitQuat->vec128, unitQuat->vec128, _VECTORMATH_PERM_YZXW );
-    zxyw = vec_perm( unitQuat->vec128, unitQuat->vec128, _VECTORMATH_PERM_ZXYW );
-    yzxw_2 = vec_perm( xyzw_2, xyzw_2, _VECTORMATH_PERM_YZXW );
-    zxyw_2 = vec_perm( xyzw_2, xyzw_2, _VECTORMATH_PERM_ZXYW );
-    tmp0 = vec_madd( yzxw_2, wwww, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmp1 = vec_nmsub( yzxw, yzxw_2, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-    tmp2 = vec_madd( yzxw, xyzw_2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmp0 = vec_madd( zxyw, xyzw_2, tmp0 );
-    tmp1 = vec_nmsub( zxyw, zxyw_2, tmp1 );
-    tmp2 = vec_nmsub( zxyw_2, wwww, tmp2 );
-    tmp3 = vec_sel( tmp0, tmp1, select_x );
-    tmp4 = vec_sel( tmp1, tmp2, select_x );
-    tmp5 = vec_sel( tmp2, tmp0, select_x );
-    result->col0.vec128 = vec_sel( tmp3, tmp2, select_z );
-    result->col1.vec128 = vec_sel( tmp4, tmp0, select_z );
-    result->col2.vec128 = vec_sel( tmp5, tmp1, select_z );
-}
-
-static inline void vmathM3MakeFromCols( VmathMatrix3 *result, const VmathVector3 *_col0, const VmathVector3 *_col1, const VmathVector3 *_col2 )
-{
-    vmathV3Copy( &result->col0, _col0 );
-    vmathV3Copy( &result->col1, _col1 );
-    vmathV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathM3SetCol0( VmathMatrix3 *result, const VmathVector3 *_col0 )
-{
-    vmathV3Copy( &result->col0, _col0 );
-}
-
-static inline void vmathM3SetCol1( VmathMatrix3 *result, const VmathVector3 *_col1 )
-{
-    vmathV3Copy( &result->col1, _col1 );
-}
-
-static inline void vmathM3SetCol2( VmathMatrix3 *result, const VmathVector3 *_col2 )
-{
-    vmathV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathM3SetCol( VmathMatrix3 *result, int col, const VmathVector3 *vec )
-{
-    vmathV3Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathM3SetRow( VmathMatrix3 *result, int row, const VmathVector3 *vec )
-{
-    vmathV3SetElem( &result->col0, row, vmathV3GetElem( vec, 0 ) );
-    vmathV3SetElem( &result->col1, row, vmathV3GetElem( vec, 1 ) );
-    vmathV3SetElem( &result->col2, row, vmathV3GetElem( vec, 2 ) );
-}
-
-static inline void vmathM3SetElem( VmathMatrix3 *result, int col, int row, float val )
-{
-    VmathVector3 tmpV3_0;
-    vmathM3GetCol( &tmpV3_0, result, col );
-    vmathV3SetElem( &tmpV3_0, row, val );
-    vmathM3SetCol( result, col, &tmpV3_0 );
-}
-
-static inline float vmathM3GetElem( const VmathMatrix3 *mat, int col, int row )
-{
-    VmathVector3 tmpV3_0;
-    vmathM3GetCol( &tmpV3_0, mat, col );
-    return vmathV3GetElem( &tmpV3_0, row );
-}
-
-static inline void vmathM3GetCol0( VmathVector3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Copy( result, &mat->col0 );
-}
-
-static inline void vmathM3GetCol1( VmathVector3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Copy( result, &mat->col1 );
-}
-
-static inline void vmathM3GetCol2( VmathVector3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Copy( result, &mat->col2 );
-}
-
-static inline void vmathM3GetCol( VmathVector3 *result, const VmathMatrix3 *mat, int col )
-{
-    vmathV3Copy( result, (&mat->col0 + col) );
-}
-
-static inline void vmathM3GetRow( VmathVector3 *result, const VmathMatrix3 *mat, int row )
-{
-    vmathV3MakeFromElems( result, vmathV3GetElem( &mat->col0, row ), vmathV3GetElem( &mat->col1, row ), vmathV3GetElem( &mat->col2, row ) );
-}
-
-static inline void vmathM3Transpose( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vec_float4 tmp0, tmp1, res0, res1, res2;
-    tmp0 = vec_mergeh( mat->col0.vec128, mat->col2.vec128 );
-    tmp1 = vec_mergel( mat->col0.vec128, mat->col2.vec128 );
-    res0 = vec_mergeh( tmp0, mat->col1.vec128 );
-    res1 = vec_perm( tmp0, mat->col1.vec128, _VECTORMATH_PERM_ZBWX );
-    res2 = vec_perm( tmp1, mat->col1.vec128, _VECTORMATH_PERM_XCYX );
-    result->col0.vec128 = res0;
-    result->col1.vec128 = res1;
-    result->col2.vec128 = res2;
-}
-
-static inline void vmathM3Inverse( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    tmp2 = _vmathVfCross( mat->col0.vec128, mat->col1.vec128 );
-    tmp0 = _vmathVfCross( mat->col1.vec128, mat->col2.vec128 );
-    tmp1 = _vmathVfCross( mat->col2.vec128, mat->col0.vec128 );
-    dot = _vmathVfDot3( tmp2, mat->col2.vec128 );
-    dot = vec_splat( dot, 0 );
-    invdet = recipf4( dot );
-    tmp3 = vec_mergeh( tmp0, tmp2 );
-    tmp4 = vec_mergel( tmp0, tmp2 );
-    inv0 = vec_mergeh( tmp3, tmp1 );
-    inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
-    inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
-    inv0 = vec_madd( inv0, invdet, zero );
-    inv1 = vec_madd( inv1, invdet, zero );
-    inv2 = vec_madd( inv2, invdet, zero );
-    result->col0.vec128 = inv0;
-    result->col1.vec128 = inv1;
-    result->col2.vec128 = inv2;
-}
-
-static inline float vmathM3Determinant( const VmathMatrix3 *mat )
-{
-    VmathVector3 tmpV3_0;
-    vmathV3Cross( &tmpV3_0, &mat->col0, &mat->col1 );
-    return vmathV3Dot( &mat->col2, &tmpV3_0 );
-}
-
-static inline void vmathM3Add( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
-{
-    vmathV3Add( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV3Add( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV3Add( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathM3Sub( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
-{
-    vmathV3Sub( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV3Sub( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV3Sub( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathM3Neg( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Neg( &result->col0, &mat->col0 );
-    vmathV3Neg( &result->col1, &mat->col1 );
-    vmathV3Neg( &result->col2, &mat->col2 );
-}
-
-static inline void vmathM3AbsPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3AbsPerElem( &result->col0, &mat->col0 );
-    vmathV3AbsPerElem( &result->col1, &mat->col1 );
-    vmathV3AbsPerElem( &result->col2, &mat->col2 );
-}
-
-static inline void vmathM3ScalarMul( VmathMatrix3 *result, const VmathMatrix3 *mat, float scalar )
-{
-    vmathV3ScalarMul( &result->col0, &mat->col0, scalar );
-    vmathV3ScalarMul( &result->col1, &mat->col1, scalar );
-    vmathV3ScalarMul( &result->col2, &mat->col2, scalar );
-}
-
-static inline void vmathM3MulV3( VmathVector3 *result, const VmathMatrix3 *mat, const VmathVector3 *vec )
-{
-    vec_float4 res;
-    vec_float4 xxxx, yyyy, zzzz;
-    xxxx = vec_splat( vec->vec128, 0 );
-    yyyy = vec_splat( vec->vec128, 1 );
-    zzzz = vec_splat( vec->vec128, 2 );
-    res = vec_madd( mat->col0.vec128, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    res = vec_madd( mat->col1.vec128, yyyy, res );
-    res = vec_madd( mat->col2.vec128, zzzz, res );
-    result->vec128 = res;
-}
-
-static inline void vmathM3Mul( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
-{
-    VmathMatrix3 tmpResult;
-    vmathM3MulV3( &tmpResult.col0, mat0, &mat1->col0 );
-    vmathM3MulV3( &tmpResult.col1, mat0, &mat1->col1 );
-    vmathM3MulV3( &tmpResult.col2, mat0, &mat1->col2 );
-    vmathM3Copy( result, &tmpResult );
-}
-
-static inline void vmathM3MulPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
-{
-    vmathV3MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV3MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV3MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathM3MakeIdentity( VmathMatrix3 *result )
-{
-    vmathV3MakeXAxis( &result->col0 );
-    vmathV3MakeYAxis( &result->col1 );
-    vmathV3MakeZAxis( &result->col2 );
-}
-
-static inline void vmathM3MakeRotationX( VmathMatrix3 *result, float radians )
-{
-    vec_float4 s, c, res1, res2;
-    vec_uint4 select_y, select_z;
-    vec_float4 zero;
-    select_y = _VECTORMATH_MASK_0x0F00;
-    select_z = _VECTORMATH_MASK_0x00F0;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
-    res1 = vec_sel( zero, c, select_y );
-    res1 = vec_sel( res1, s, select_z );
-    res2 = vec_sel( zero, negatef4(s), select_y );
-    res2 = vec_sel( res2, c, select_z );
-    vmathV3MakeXAxis( &result->col0 );
-    result->col1.vec128 = res1;
-    result->col2.vec128 = res2;
-}
-
-static inline void vmathM3MakeRotationY( VmathMatrix3 *result, float radians )
-{
-    vec_float4 s, c, res0, res2;
-    vec_uint4 select_x, select_z;
-    vec_float4 zero;
-    select_x = _VECTORMATH_MASK_0xF000;
-    select_z = _VECTORMATH_MASK_0x00F0;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, negatef4(s), select_z );
-    res2 = vec_sel( zero, s, select_x );
-    res2 = vec_sel( res2, c, select_z );
-    result->col0.vec128 = res0;
-    vmathV3MakeYAxis( &result->col1 );
-    result->col2.vec128 = res2;
-}
-
-static inline void vmathM3MakeRotationZ( VmathMatrix3 *result, float radians )
-{
-    vec_float4 s, c, res0, res1;
-    vec_uint4 select_x, select_y;
-    vec_float4 zero;
-    select_x = _VECTORMATH_MASK_0xF000;
-    select_y = _VECTORMATH_MASK_0x0F00;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, s, select_y );
-    res1 = vec_sel( zero, negatef4(s), select_x );
-    res1 = vec_sel( res1, c, select_y );
-    result->col0.vec128 = res0;
-    result->col1.vec128 = res1;
-    vmathV3MakeZAxis( &result->col2 );
-}
-
-static inline void vmathM3MakeRotationZYX( VmathMatrix3 *result, const VmathVector3 *radiansXYZ )
-{
-    VmathVector4 tmpV4_0;
-    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    vmathV4MakeFromV3Scalar( &tmpV4_0, radiansXYZ, 0.0f );
-    angles = tmpV4_0.vec128;
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = vec_mergel( c, s );
-    Z1 = vec_mergel( negS, c );
-    Z1 = vec_andc( Z1, (vec_float4)_VECTORMATH_MASK_0x000F );
-    Y0 = vec_perm( negS, c, _VECTORMATH_PERM_BBYX );
-    Y1 = vec_perm( c, s, _VECTORMATH_PERM_BBYX );
-    X0 = vec_splat( s, 0 );
-    X1 = vec_splat( c, 0 );
-    tmp = vec_madd( Z0, Y1, zero );
-    result->col0.vec128 = vec_madd( Z0, Y0, zero );
-    result->col1.vec128 = vec_madd( Z1, X1, vec_madd( tmp, X0, zero ) );
-    result->col2.vec128 = vec_nmsub( Z1, X0, vec_madd( tmp, X1, zero ) );
-}
-
-static inline void vmathM3MakeRotationAxis( VmathMatrix3 *result, float radians, const VmathVector3 *unitVec )
-{
-    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    axis = unitVec->vec128;
-    sincosf4( (vec_float4){radians,radians,radians,radians}, &s, &c );
-    xxxx = vec_splat( axis, 0 );
-    yyyy = vec_splat( axis, 1 );
-    zzzz = vec_splat( axis, 2 );
-    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
-    axisS = vec_madd( axis, s, zero );
-    negAxisS = negatef4( axisS );
-    tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
-    tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
-    tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
-    tmp0 = vec_sel( tmp0, c, _VECTORMATH_MASK_0xF000 );
-    tmp1 = vec_sel( tmp1, c, _VECTORMATH_MASK_0x0F00 );
-    tmp2 = vec_sel( tmp2, c, _VECTORMATH_MASK_0x00F0 );
-    result->col0.vec128 = vec_madd( vec_madd( axis, xxxx, zero ), oneMinusC, tmp0 );
-    result->col1.vec128 = vec_madd( vec_madd( axis, yyyy, zero ), oneMinusC, tmp1 );
-    result->col2.vec128 = vec_madd( vec_madd( axis, zzzz, zero ), oneMinusC, tmp2 );
-}
-
-static inline void vmathM3MakeRotationQ( VmathMatrix3 *result, const VmathQuat *unitQuat )
-{
-    vmathM3MakeFromQ( result, unitQuat );
-}
-
-static inline void vmathM3MakeScale( VmathMatrix3 *result, const VmathVector3 *scaleVec )
-{
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    result->col0.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0xF000 );
-    result->col1.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0x0F00 );
-    result->col2.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0x00F0 );
-}
-
-static inline void vmathM3AppendScale( VmathMatrix3 *result, const VmathMatrix3 *mat, const VmathVector3 *scaleVec )
-{
-    vmathV3ScalarMul( &result->col0, &mat->col0, vmathV3GetX( scaleVec ) );
-    vmathV3ScalarMul( &result->col1, &mat->col1, vmathV3GetY( scaleVec ) );
-    vmathV3ScalarMul( &result->col2, &mat->col2, vmathV3GetZ( scaleVec ) );
-}
-
-static inline void vmathM3PrependScale( VmathMatrix3 *result, const VmathVector3 *scaleVec, const VmathMatrix3 *mat )
-{
-    vmathV3MulPerElem( &result->col0, &mat->col0, scaleVec );
-    vmathV3MulPerElem( &result->col1, &mat->col1, scaleVec );
-    vmathV3MulPerElem( &result->col2, &mat->col2, scaleVec );
-}
-
-static inline void vmathM3Select( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, unsigned int select1 )
-{
-    vmathV3Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
-    vmathV3Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
-    vmathV3Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathM3Print( const VmathMatrix3 *mat )
-{
-    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2;
-    vmathM3GetRow( &tmpV3_0, mat, 0 );
-    vmathV3Print( &tmpV3_0 );
-    vmathM3GetRow( &tmpV3_1, mat, 1 );
-    vmathV3Print( &tmpV3_1 );
-    vmathM3GetRow( &tmpV3_2, mat, 2 );
-    vmathV3Print( &tmpV3_2 );
-}
-
-static inline void vmathM3Prints( const VmathMatrix3 *mat, const char *name )
-{
-    printf("%s:\n", name);
-    vmathM3Print( mat );
-}
-
-#endif
-
-static inline void vmathM4Copy( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( &result->col0, &mat->col0 );
-    vmathV4Copy( &result->col1, &mat->col1 );
-    vmathV4Copy( &result->col2, &mat->col2 );
-    vmathV4Copy( &result->col3, &mat->col3 );
-}
-
-static inline void vmathM4MakeFromScalar( VmathMatrix4 *result, float scalar )
-{
-    vmathV4MakeFromScalar( &result->col0, scalar );
-    vmathV4MakeFromScalar( &result->col1, scalar );
-    vmathV4MakeFromScalar( &result->col2, scalar );
-    vmathV4MakeFromScalar( &result->col3, scalar );
-}
-
-static inline void vmathM4MakeFromT3( VmathMatrix4 *result, const VmathTransform3 *mat )
-{
-    vmathV4MakeFromV3Scalar( &result->col0, &mat->col0, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col1, &mat->col1, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col2, &mat->col2, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col3, &mat->col3, 1.0f );
-}
-
-static inline void vmathM4MakeFromCols( VmathMatrix4 *result, const VmathVector4 *_col0, const VmathVector4 *_col1, const VmathVector4 *_col2, const VmathVector4 *_col3 )
-{
-    vmathV4Copy( &result->col0, _col0 );
-    vmathV4Copy( &result->col1, _col1 );
-    vmathV4Copy( &result->col2, _col2 );
-    vmathV4Copy( &result->col3, _col3 );
-}
-
-static inline void vmathM4MakeFromM3V3( VmathMatrix4 *result, const VmathMatrix3 *mat, const VmathVector3 *translateVec )
-{
-    vmathV4MakeFromV3Scalar( &result->col0, &mat->col0, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col1, &mat->col1, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col2, &mat->col2, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
-}
-
-static inline void vmathM4MakeFromQV3( VmathMatrix4 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec )
-{
-    VmathMatrix3 mat;
-    vmathM3MakeFromQ( &mat, unitQuat );
-    vmathV4MakeFromV3Scalar( &result->col0, &mat.col0, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col1, &mat.col1, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col2, &mat.col2, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
-}
-
-static inline void vmathM4SetCol0( VmathMatrix4 *result, const VmathVector4 *_col0 )
-{
-    vmathV4Copy( &result->col0, _col0 );
-}
-
-static inline void vmathM4SetCol1( VmathMatrix4 *result, const VmathVector4 *_col1 )
-{
-    vmathV4Copy( &result->col1, _col1 );
-}
-
-static inline void vmathM4SetCol2( VmathMatrix4 *result, const VmathVector4 *_col2 )
-{
-    vmathV4Copy( &result->col2, _col2 );
-}
-
-static inline void vmathM4SetCol3( VmathMatrix4 *result, const VmathVector4 *_col3 )
-{
-    vmathV4Copy( &result->col3, _col3 );
-}
-
-static inline void vmathM4SetCol( VmathMatrix4 *result, int col, const VmathVector4 *vec )
-{
-    vmathV4Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathM4SetRow( VmathMatrix4 *result, int row, const VmathVector4 *vec )
-{
-    vmathV4SetElem( &result->col0, row, vmathV4GetElem( vec, 0 ) );
-    vmathV4SetElem( &result->col1, row, vmathV4GetElem( vec, 1 ) );
-    vmathV4SetElem( &result->col2, row, vmathV4GetElem( vec, 2 ) );
-    vmathV4SetElem( &result->col3, row, vmathV4GetElem( vec, 3 ) );
-}
-
-static inline void vmathM4SetElem( VmathMatrix4 *result, int col, int row, float val )
-{
-    VmathVector4 tmpV3_0;
-    vmathM4GetCol( &tmpV3_0, result, col );
-    vmathV4SetElem( &tmpV3_0, row, val );
-    vmathM4SetCol( result, col, &tmpV3_0 );
-}
-
-static inline float vmathM4GetElem( const VmathMatrix4 *mat, int col, int row )
-{
-    VmathVector4 tmpV4_0;
-    vmathM4GetCol( &tmpV4_0, mat, col );
-    return vmathV4GetElem( &tmpV4_0, row );
-}
-
-static inline void vmathM4GetCol0( VmathVector4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( result, &mat->col0 );
-}
-
-static inline void vmathM4GetCol1( VmathVector4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( result, &mat->col1 );
-}
-
-static inline void vmathM4GetCol2( VmathVector4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( result, &mat->col2 );
-}
-
-static inline void vmathM4GetCol3( VmathVector4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( result, &mat->col3 );
-}
-
-static inline void vmathM4GetCol( VmathVector4 *result, const VmathMatrix4 *mat, int col )
-{
-    vmathV4Copy( result, (&mat->col0 + col) );
-}
-
-static inline void vmathM4GetRow( VmathVector4 *result, const VmathMatrix4 *mat, int row )
-{
-    vmathV4MakeFromElems( result, vmathV4GetElem( &mat->col0, row ), vmathV4GetElem( &mat->col1, row ), vmathV4GetElem( &mat->col2, row ), vmathV4GetElem( &mat->col3, row ) );
-}
-
-static inline void vmathM4Transpose( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3;
-    tmp0 = vec_mergeh( mat->col0.vec128, mat->col2.vec128 );
-    tmp1 = vec_mergeh( mat->col1.vec128, mat->col3.vec128 );
-    tmp2 = vec_mergel( mat->col0.vec128, mat->col2.vec128 );
-    tmp3 = vec_mergel( mat->col1.vec128, mat->col3.vec128 );
-    res0 = vec_mergeh( tmp0, tmp1 );
-    res1 = vec_mergel( tmp0, tmp1 );
-    res2 = vec_mergeh( tmp2, tmp3 );
-    res3 = vec_mergel( tmp2, tmp3 );
-    result->col0.vec128 = res0;
-    result->col1.vec128 = res1;
-    result->col2.vec128 = res2;
-    result->col3.vec128 = res3;
-}
-
-static inline void vmathM4Inverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vector float in0, in1, in2, in3;
-    vector float tmp0, tmp1, tmp2, tmp3;
-    vector float cof0, cof1, cof2, cof3;
-    vector float t0, t1, t2, t3;
-    vector float t01, t02, t03, t12, t23;
-    vector float t1r, t2r;
-    vector float t01r, t02r, t03r, t12r, t23r;
-    vector float t1r3, t1r3r;
-    vector float det, det0, det1, det2, det3, invdet;
-    vector float vzero = (vector float){0.0};
-    in0 = mat->col0.vec128;
-    in1 = mat->col1.vec128;
-    in2 = mat->col2.vec128;
-    in3 = mat->col3.vec128;
-    /* Perform transform of the input matrix of the form:
-     *    A B C D
-     *    E F G H
-     *    I J K L
-     *    M N O P
-     *
-     * The pseudo transpose of the input matrix is trans:
-     *    A E I M
-     *    J N B F
-     *    C G K O
-     *    L P D H
-     */
-    tmp0 = vec_perm(in0, in1, _VECTORMATH_PERM_XAZC);	/* A E C G */
-    tmp1 = vec_perm(in2, in3, _VECTORMATH_PERM_XAZC);	/* I M K O */
-    tmp2 = vec_perm(in0, in1, _VECTORMATH_PERM_YBWD);	/* B F D H */
-    tmp3 = vec_perm(in2, in3, _VECTORMATH_PERM_YBWD);	/* J N L P */
-    t0 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_XYAB);	/* A E I M */
-    t1 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_XYAB);	/* J N B F */
-    t2 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_ZWCD);	/* C G K O */
-    t3 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_ZWCD);	/* L P D H */
-    /* Generate a cofactor matrix. The computed cofactors reside in
-     * cof0, cof1, cof2, cof3.
-     */
-    t23 = vec_madd(t2, t3, vzero);		/* CL GP KD OH */
-    t23 = vec_perm(t23, t23, _VECTORMATH_PERM_YXWZ);	/* GP CL OH KD */
-    cof0 = vec_nmsub(t1, t23, vzero);		/* -(JGP NCL FOH BKD) */
-    cof1 = vec_nmsub(t0, t23, vzero);		/* -(AGP ECL IOH MKD) */
-    t23r = vec_sld(t23, t23, 8);			/* OH KD GP CL */
-    cof0 = vec_madd(t1, t23r, cof0);		/* JOH NKD BGP FCL + cof0 */
-    cof1 = vec_madd(t0, t23r, cof1);		/* AOH EKD IGP MCL + cof1 */
-    cof1 = vec_sld(cof1, cof1, 8);		/* IGP MCL AOH EKD - IOH MKD AGP ECL */
-    t12 = vec_madd(t1, t2, vzero);		/* JC NG BK FO */
-    t12 = vec_perm(t12, t12, _VECTORMATH_PERM_YXWZ);	/* NG JC FO BK */
-    cof0 = vec_madd(t3, t12, cof0);		/* LNG PJC DFO HBK + cof0 */
-    cof3 = vec_madd(t0, t12, vzero);		/* ANG EJC IFO MBK */
-    t12r = vec_sld(t12, t12, 8);			/* FO BK NG JC */
-    cof0 = vec_nmsub(t3, t12r, cof0);		/* cof0 - LFO PBK DNG HJC */
-    cof3 = vec_nmsub(t0, t12r, cof3);		/* cof3 - AFO EBK ING MJC */
-    cof3 = vec_sld(cof3, cof3, 8);		/* ING MJC AFO EBK - IFO MBK ANG EJC */
-    t1r = vec_sld(t1, t1, 8);			/* B F J N */
-    t2r = vec_sld(t2, t2, 8);			/* K O C G */
-    t1r3 = vec_madd(t1r, t3, vzero);		/* BL FP JD NH */
-    t1r3 = vec_perm(t1r3, t1r3, _VECTORMATH_PERM_YXWZ);	/* FP BL NH JD */
-    cof0 = vec_madd(t2r, t1r3, cof0);		/* KFP OBL CNH GJD + cof0 */
-    cof2 = vec_madd(t0, t1r3, vzero);		/* AFP EBL INH MJD */
-    t1r3r = vec_sld(t1r3, t1r3, 8);		/* NH JD FP BL */
-    cof0 = vec_nmsub(t2r, t1r3r, cof0);		/* cof0 - KNH OJD CFP GBL */
-    cof2 = vec_nmsub(t0, t1r3r, cof2);		/* cof2 - ANH EJD IFP MBL */
-    cof2 = vec_sld(cof2, cof2, 8);		/* IFP MBL ANH EJD - INH MJD AFP EBL */
-    t01 = vec_madd(t0, t1, vzero);		/* AJ EN IB MF */
-    t01 = vec_perm(t01, t01, _VECTORMATH_PERM_YXWZ);	/* EN AJ MF IB */
-    cof2 = vec_nmsub(t3, t01, cof2);		/* cof2 - LEN PAJ DMF HIB */
-    cof3 = vec_madd(t2r, t01, cof3);		/* KEN OAJ CMF GIB + cof3 */ 
-    t01r = vec_sld(t01, t01, 8);			/* MF IB EN AJ */
-    cof2 = vec_madd(t3, t01r, cof2);		/* LMF PIB DEN HAJ + cof2 */
-    cof3 = vec_nmsub(t2r, t01r, cof3);		/* cof3 - KMF OIB CEN GAJ */
-    t03 = vec_madd(t0, t3, vzero);		/* AL EP ID MH */
-    t03 = vec_perm(t03, t03, _VECTORMATH_PERM_YXWZ);	/* EP AL MH ID */
-    cof1 = vec_nmsub(t2r, t03, cof1);		/* cof1 - KEP OAL CMH GID */
-    cof2 = vec_madd(t1, t03, cof2);		/* JEP NAL BMH FID + cof2 */
-    t03r = vec_sld(t03, t03, 8);			/* MH ID EP AL */
-    cof1 = vec_madd(t2r, t03r, cof1);		/* KMH OID CEP GAL + cof1 */
-    cof2 = vec_nmsub(t1, t03r, cof2);		/* cof2 - JMH NID BEP FAL */ 
-    t02 = vec_madd(t0, t2r, vzero);		/* AK EO IC MG */
-    t02 = vec_perm(t02, t02, _VECTORMATH_PERM_YXWZ);	/* E0 AK MG IC */
-    cof1 = vec_madd(t3, t02, cof1);		/* LEO PAK DMG HIC + cof1 */
-    cof3 = vec_nmsub(t1, t02, cof3);		/* cof3 - JEO NAK BMG FIC */
-    t02r = vec_sld(t02, t02, 8);			/* MG IC EO AK */
-    cof1 = vec_nmsub(t3, t02r, cof1);		/* cof1 - LMG PIC DEO HAK */
-    cof3 = vec_madd(t1, t02r, cof3);		/* JMG NIC BEO FAK + cof3 */
-    /* Compute the determinant of the matrix 
-     *
-     * det = sum_across(t0 * cof0);
-     *
-     * We perform a sum across the entire vector so that 
-     * we don't have to splat the result when multiplying the
-     * cofactors by the inverse of the determinant.
-     */
-    det  = vec_madd(t0, cof0, vzero);
-    det0 = vec_splat(det, 0);
-    det1 = vec_splat(det, 1);
-    det2 = vec_splat(det, 2);
-    det3 = vec_splat(det, 3);
-    det  = vec_add(det0, det1);
-    det2 = vec_add(det2, det3);
-    det  = vec_add(det, det2);
-    /* Compute the reciprocal of the determinant.
-     */
-    invdet = recipf4(det);
-    /* Multiply the cofactors by the reciprocal of the determinant.
-     */ 
-    result->col0.vec128 = vec_madd(cof0, invdet, vzero);
-    result->col1.vec128 = vec_madd(cof1, invdet, vzero);
-    result->col2.vec128 = vec_madd(cof2, invdet, vzero);
-    result->col3.vec128 = vec_madd(cof3, invdet, vzero);
-}
-
-static inline void vmathM4AffineInverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    VmathTransform3 affineMat, tmpT3_0;
-    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    vmathV4GetXYZ( &tmpV3_0, &mat->col0 );
-    vmathT3SetCol0( &affineMat, &tmpV3_0 );
-    vmathV4GetXYZ( &tmpV3_1, &mat->col1 );
-    vmathT3SetCol1( &affineMat, &tmpV3_1 );
-    vmathV4GetXYZ( &tmpV3_2, &mat->col2 );
-    vmathT3SetCol2( &affineMat, &tmpV3_2 );
-    vmathV4GetXYZ( &tmpV3_3, &mat->col3 );
-    vmathT3SetCol3( &affineMat, &tmpV3_3 );
-    vmathT3Inverse( &tmpT3_0, &affineMat );
-    vmathM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline void vmathM4OrthoInverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    VmathTransform3 affineMat, tmpT3_0;
-    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    vmathV4GetXYZ( &tmpV3_0, &mat->col0 );
-    vmathT3SetCol0( &affineMat, &tmpV3_0 );
-    vmathV4GetXYZ( &tmpV3_1, &mat->col1 );
-    vmathT3SetCol1( &affineMat, &tmpV3_1 );
-    vmathV4GetXYZ( &tmpV3_2, &mat->col2 );
-    vmathT3SetCol2( &affineMat, &tmpV3_2 );
-    vmathV4GetXYZ( &tmpV3_3, &mat->col3 );
-    vmathT3SetCol3( &affineMat, &tmpV3_3 );
-    vmathT3OrthoInverse( &tmpT3_0, &affineMat );
-    vmathM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline float vmathM4Determinant( const VmathMatrix4 *mat )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vector float in0, in1, in2, in3;
-    vector float tmp0, tmp1, tmp2, tmp3;
-    vector float cof0;
-    vector float t0, t1, t2, t3;
-    vector float t12, t23;
-    vector float t1r, t2r;
-    vector float t12r, t23r;
-    vector float t1r3, t1r3r;
-    vector float vzero = (vector float){0.0};
-    union { vec_float4 v; float s[4]; } tmp;
-    in0 = mat->col0.vec128;
-    in1 = mat->col1.vec128;
-    in2 = mat->col2.vec128;
-    in3 = mat->col3.vec128;
-    /* Perform transform of the input matrix of the form:
-     *    A B C D
-     *    E F G H
-     *    I J K L
-     *    M N O P
-     *
-     * The pseudo transpose of the input matrix is trans:
-     *    A E I M
-     *    J N B F
-     *    C G K O
-     *    L P D H
-     */
-    tmp0 = vec_perm(in0, in1, _VECTORMATH_PERM_XAZC);	/* A E C G */
-    tmp1 = vec_perm(in2, in3, _VECTORMATH_PERM_XAZC);	/* I M K O */
-    tmp2 = vec_perm(in0, in1, _VECTORMATH_PERM_YBWD);	/* B F D H */
-    tmp3 = vec_perm(in2, in3, _VECTORMATH_PERM_YBWD);	/* J N L P */
-    t0 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_XYAB);	/* A E I M */
-    t1 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_XYAB);	/* J N B F */
-    t2 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_ZWCD);	/* C G K O */
-    t3 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_ZWCD);	/* L P D H */
-    /* Generate a cofactor matrix. The computed cofactors reside in
-     * cof0, cof1, cof2, cof3.
-     */
-    t23 = vec_madd(t2, t3, vzero);		/* CL GP KD OH */
-    t23 = vec_perm(t23, t23, _VECTORMATH_PERM_YXWZ);	/* GP CL OH KD */
-    cof0 = vec_nmsub(t1, t23, vzero);		/* -(JGP NCL FOH BKD) */
-    t23r = vec_sld(t23, t23, 8);			/* OH KD GP CL */
-    cof0 = vec_madd(t1, t23r, cof0);		/* JOH NKD BGP FCL + cof0 */
-    t12 = vec_madd(t1, t2, vzero);		/* JC NG BK FO */
-    t12 = vec_perm(t12, t12, _VECTORMATH_PERM_YXWZ);	/* NG JC FO BK */
-    cof0 = vec_madd(t3, t12, cof0);		/* LNG PJC DFO HBK + cof0 */
-    t12r = vec_sld(t12, t12, 8);			/* FO BK NG JC */
-    cof0 = vec_nmsub(t3, t12r, cof0);		/* cof0 - LFO PBK DNG HJC */
-    t1r = vec_sld(t1, t1, 8);			/* B F J N */
-    t2r = vec_sld(t2, t2, 8);			/* K O C G */
-    t1r3 = vec_madd(t1r, t3, vzero);		/* BL FP JD NH */
-    t1r3 = vec_perm(t1r3, t1r3, _VECTORMATH_PERM_YXWZ);	/* FP BL NH JD */
-    cof0 = vec_madd(t2r, t1r3, cof0);		/* KFP OBL CNH GJD + cof0 */
-    t1r3r = vec_sld(t1r3, t1r3, 8);		/* NH JD FP BL */
-    cof0 = vec_nmsub(t2r, t1r3r, cof0);		/* cof0 - KNH OJD CFP GBL */
-    tmp.v = _vmathVfDot4(t0,cof0);
-    return tmp.s[0];
-}
-
-static inline void vmathM4Add( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
-{
-    vmathV4Add( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV4Add( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV4Add( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathV4Add( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathM4Sub( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
-{
-    vmathV4Sub( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV4Sub( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV4Sub( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathV4Sub( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathM4Neg( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Neg( &result->col0, &mat->col0 );
-    vmathV4Neg( &result->col1, &mat->col1 );
-    vmathV4Neg( &result->col2, &mat->col2 );
-    vmathV4Neg( &result->col3, &mat->col3 );
-}
-
-static inline void vmathM4AbsPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4AbsPerElem( &result->col0, &mat->col0 );
-    vmathV4AbsPerElem( &result->col1, &mat->col1 );
-    vmathV4AbsPerElem( &result->col2, &mat->col2 );
-    vmathV4AbsPerElem( &result->col3, &mat->col3 );
-}
-
-static inline void vmathM4ScalarMul( VmathMatrix4 *result, const VmathMatrix4 *mat, float scalar )
-{
-    vmathV4ScalarMul( &result->col0, &mat->col0, scalar );
-    vmathV4ScalarMul( &result->col1, &mat->col1, scalar );
-    vmathV4ScalarMul( &result->col2, &mat->col2, scalar );
-    vmathV4ScalarMul( &result->col3, &mat->col3, scalar );
-}
-
-static inline void vmathM4MulV4( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector4 *vec )
-{
-    vec_float4 tmp0, tmp1, res;
-    vec_float4 xxxx, yyyy, zzzz, wwww;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    xxxx = vec_splat( vec->vec128, 0 );
-    yyyy = vec_splat( vec->vec128, 1 );
-    zzzz = vec_splat( vec->vec128, 2 );
-    wwww = vec_splat( vec->vec128, 3 );
-    tmp0 = vec_madd( mat->col0.vec128, xxxx, zero );
-    tmp1 = vec_madd( mat->col1.vec128, yyyy, zero );
-    tmp0 = vec_madd( mat->col2.vec128, zzzz, tmp0 );
-    tmp1 = vec_madd( mat->col3.vec128, wwww, tmp1 );
-    res = vec_add( tmp0, tmp1 );
-    result->vec128 = res;
-}
-
-static inline void vmathM4MulV3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector3 *vec )
-{
-    vec_float4 res;
-    vec_float4 xxxx, yyyy, zzzz;
-    xxxx = vec_splat( vec->vec128, 0 );
-    yyyy = vec_splat( vec->vec128, 1 );
-    zzzz = vec_splat( vec->vec128, 2 );
-    res = vec_madd( mat->col0.vec128, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    res = vec_madd( mat->col1.vec128, yyyy, res );
-    res = vec_madd( mat->col2.vec128, zzzz, res );
-    result->vec128 = res;
-}
-
-static inline void vmathM4MulP3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathPoint3 *pnt )
-{
-    vec_float4 tmp0, tmp1, res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    xxxx = vec_splat( pnt->vec128, 0 );
-    yyyy = vec_splat( pnt->vec128, 1 );
-    zzzz = vec_splat( pnt->vec128, 2 );
-    tmp0 = vec_madd( mat->col0.vec128, xxxx, zero );
-    tmp1 = vec_madd( mat->col1.vec128, yyyy, zero );
-    tmp0 = vec_madd( mat->col2.vec128, zzzz, tmp0 );
-    tmp1 = vec_add( mat->col3.vec128, tmp1 );
-    res = vec_add( tmp0, tmp1 );
-    result->vec128 = res;
-}
-
-static inline void vmathM4Mul( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
-{
-    VmathMatrix4 tmpResult;
-    vmathM4MulV4( &tmpResult.col0, mat0, &mat1->col0 );
-    vmathM4MulV4( &tmpResult.col1, mat0, &mat1->col1 );
-    vmathM4MulV4( &tmpResult.col2, mat0, &mat1->col2 );
-    vmathM4MulV4( &tmpResult.col3, mat0, &mat1->col3 );
-    vmathM4Copy( result, &tmpResult );
-}
-
-static inline void vmathM4MulT3( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathTransform3 *tfrm1 )
-{
-    VmathMatrix4 tmpResult;
-    VmathPoint3 tmpP3_0;
-    vmathM4MulV3( &tmpResult.col0, mat, &tfrm1->col0 );
-    vmathM4MulV3( &tmpResult.col1, mat, &tfrm1->col1 );
-    vmathM4MulV3( &tmpResult.col2, mat, &tfrm1->col2 );
-    vmathP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
-    vmathM4MulP3( &tmpResult.col3, mat, &tmpP3_0 );
-    vmathM4Copy( result, &tmpResult );
-}
-
-static inline void vmathM4MulPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
-{
-    vmathV4MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV4MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV4MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathV4MulPerElem( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathM4MakeIdentity( VmathMatrix4 *result )
-{
-    vmathV4MakeXAxis( &result->col0 );
-    vmathV4MakeYAxis( &result->col1 );
-    vmathV4MakeZAxis( &result->col2 );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4SetUpper3x3( VmathMatrix4 *result, const VmathMatrix3 *mat3 )
-{
-    vmathV4SetXYZ( &result->col0, &mat3->col0 );
-    vmathV4SetXYZ( &result->col1, &mat3->col1 );
-    vmathV4SetXYZ( &result->col2, &mat3->col2 );
-}
-
-static inline void vmathM4GetUpper3x3( VmathMatrix3 *result, const VmathMatrix4 *mat )
-{
-    vmathV4GetXYZ( &result->col0, &mat->col0 );
-    vmathV4GetXYZ( &result->col1, &mat->col1 );
-    vmathV4GetXYZ( &result->col2, &mat->col2 );
-}
-
-static inline void vmathM4SetTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec )
-{
-    vmathV4SetXYZ( &result->col3, translateVec );
-}
-
-static inline void vmathM4GetTranslation( VmathVector3 *result, const VmathMatrix4 *mat )
-{
-    vmathV4GetXYZ( result, &mat->col3 );
-}
-
-static inline void vmathM4MakeRotationX( VmathMatrix4 *result, float radians )
-{
-    vec_float4 s, c, res1, res2;
-    vec_uint4 select_y, select_z;
-    vec_float4 zero;
-    select_y = _VECTORMATH_MASK_0x0F00;
-    select_z = _VECTORMATH_MASK_0x00F0;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
-    res1 = vec_sel( zero, c, select_y );
-    res1 = vec_sel( res1, s, select_z );
-    res2 = vec_sel( zero, negatef4(s), select_y );
-    res2 = vec_sel( res2, c, select_z );
-    vmathV4MakeXAxis( &result->col0 );
-    result->col1.vec128 = res1;
-    result->col2.vec128 = res2;
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationY( VmathMatrix4 *result, float radians )
-{
-    vec_float4 s, c, res0, res2;
-    vec_uint4 select_x, select_z;
-    vec_float4 zero;
-    select_x = _VECTORMATH_MASK_0xF000;
-    select_z = _VECTORMATH_MASK_0x00F0;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, negatef4(s), select_z );
-    res2 = vec_sel( zero, s, select_x );
-    res2 = vec_sel( res2, c, select_z );
-    result->col0.vec128 = res0;
-    vmathV4MakeYAxis( &result->col1 );
-    result->col2.vec128 = res2;
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationZ( VmathMatrix4 *result, float radians )
-{
-    vec_float4 s, c, res0, res1;
-    vec_uint4 select_x, select_y;
-    vec_float4 zero;
-    select_x = _VECTORMATH_MASK_0xF000;
-    select_y = _VECTORMATH_MASK_0x0F00;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, s, select_y );
-    res1 = vec_sel( zero, negatef4(s), select_x );
-    res1 = vec_sel( res1, c, select_y );
-    result->col0.vec128 = res0;
-    result->col1.vec128 = res1;
-    vmathV4MakeZAxis( &result->col2 );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationZYX( VmathMatrix4 *result, const VmathVector3 *radiansXYZ )
-{
-    VmathVector4 tmpV4_0;
-    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    vmathV4MakeFromV3Scalar( &tmpV4_0, radiansXYZ, 0.0f );
-    angles = tmpV4_0.vec128;
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = vec_mergel( c, s );
-    Z1 = vec_mergel( negS, c );
-    Z1 = vec_andc( Z1, (vec_float4)_VECTORMATH_MASK_0x000F );
-    Y0 = vec_perm( negS, c, _VECTORMATH_PERM_BBYX );
-    Y1 = vec_perm( c, s, _VECTORMATH_PERM_BBYX );
-    X0 = vec_splat( s, 0 );
-    X1 = vec_splat( c, 0 );
-    tmp = vec_madd( Z0, Y1, zero );
-    result->col0.vec128 = vec_madd( Z0, Y0, zero );
-    result->col1.vec128 = vec_madd( Z1, X1, vec_madd( tmp, X0, zero ) );
-    result->col2.vec128 = vec_nmsub( Z1, X0, vec_madd( tmp, X1, zero ) );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationAxis( VmathMatrix4 *result, float radians, const VmathVector3 *unitVec )
-{
-    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2, zeroW;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    axis = unitVec->vec128;
-    sincosf4( (vec_float4){radians,radians,radians,radians}, &s, &c );
-    xxxx = vec_splat( axis, 0 );
-    yyyy = vec_splat( axis, 1 );
-    zzzz = vec_splat( axis, 2 );
-    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
-    axisS = vec_madd( axis, s, zero );
-    negAxisS = negatef4( axisS );
-    tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
-    tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
-    tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
-    tmp0 = vec_sel( tmp0, c, _VECTORMATH_MASK_0xF000 );
-    tmp1 = vec_sel( tmp1, c, _VECTORMATH_MASK_0x0F00 );
-    tmp2 = vec_sel( tmp2, c, _VECTORMATH_MASK_0x00F0 );
-    zeroW = (vec_float4)_VECTORMATH_MASK_0x000F;
-    axis = vec_andc( axis, zeroW );
-    tmp0 = vec_andc( tmp0, zeroW );
-    tmp1 = vec_andc( tmp1, zeroW );
-    tmp2 = vec_andc( tmp2, zeroW );
-    result->col0.vec128 = vec_madd( vec_madd( axis, xxxx, zero ), oneMinusC, tmp0 );
-    result->col1.vec128 = vec_madd( vec_madd( axis, yyyy, zero ), oneMinusC, tmp1 );
-    result->col2.vec128 = vec_madd( vec_madd( axis, zzzz, zero ), oneMinusC, tmp2 );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationQ( VmathMatrix4 *result, const VmathQuat *unitQuat )
-{
-    VmathTransform3 tmpT3_0;
-    vmathT3MakeRotationQ( &tmpT3_0, unitQuat );
-    vmathM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline void vmathM4MakeScale( VmathMatrix4 *result, const VmathVector3 *scaleVec )
-{
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    result->col0.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0xF000 );
-    result->col1.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0x0F00 );
-    result->col2.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0x00F0 );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4AppendScale( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathVector3 *scaleVec )
-{
-    vmathV4ScalarMul( &result->col0, &mat->col0, vmathV3GetX( scaleVec ) );
-    vmathV4ScalarMul( &result->col1, &mat->col1, vmathV3GetY( scaleVec ) );
-    vmathV4ScalarMul( &result->col2, &mat->col2, vmathV3GetZ( scaleVec ) );
-    vmathV4Copy( &result->col3, &mat->col3 );
-}
-
-static inline void vmathM4PrependScale( VmathMatrix4 *result, const VmathVector3 *scaleVec, const VmathMatrix4 *mat )
-{
-    VmathVector4 scale4;
-    vmathV4MakeFromV3Scalar( &scale4, scaleVec, 1.0f );
-    vmathV4MulPerElem( &result->col0, &mat->col0, &scale4 );
-    vmathV4MulPerElem( &result->col1, &mat->col1, &scale4 );
-    vmathV4MulPerElem( &result->col2, &mat->col2, &scale4 );
-    vmathV4MulPerElem( &result->col3, &mat->col3, &scale4 );
-}
-
-static inline void vmathM4MakeTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec )
-{
-    vmathV4MakeXAxis( &result->col0 );
-    vmathV4MakeYAxis( &result->col1 );
-    vmathV4MakeZAxis( &result->col2 );
-    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
-}
-
-static inline void vmathM4MakeLookAt( VmathMatrix4 *result, const VmathPoint3 *eyePos, const VmathPoint3 *lookAtPos, const VmathVector3 *upVec )
-{
-    VmathMatrix4 m4EyeFrame;
-    VmathVector3 v3X, v3Y, v3Z, tmpV3_0, tmpV3_1;
-    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
-    vmathV3Normalize( &v3Y, upVec );
-    vmathP3Sub( &tmpV3_0, eyePos, lookAtPos );
-    vmathV3Normalize( &v3Z, &tmpV3_0 );
-    vmathV3Cross( &tmpV3_1, &v3Y, &v3Z );
-    vmathV3Normalize( &v3X, &tmpV3_1 );
-    vmathV3Cross( &v3Y, &v3Z, &v3X );
-    vmathV4MakeFromV3( &tmpV4_0, &v3X );
-    vmathV4MakeFromV3( &tmpV4_1, &v3Y );
-    vmathV4MakeFromV3( &tmpV4_2, &v3Z );
-    vmathV4MakeFromP3( &tmpV4_3, eyePos );
-    vmathM4MakeFromCols( &m4EyeFrame, &tmpV4_0, &tmpV4_1, &tmpV4_2, &tmpV4_3 );
-    vmathM4OrthoInverse( result, &m4EyeFrame );
-}
-
-static inline void vmathM4MakePerspective( VmathMatrix4 *result, float fovyRadians, float aspect, float zNear, float zFar )
-{
-    float f, rangeInv;
-    vec_float4 zero, col0, col1, col2, col3;
-    union { vec_float4 v; float s[4]; } tmp;
-    f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f );
-    rangeInv = 1.0f / ( zNear - zFar );
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    tmp.v = zero;
-    tmp.s[0] = f / aspect;
-    col0 = tmp.v;
-    tmp.v = zero;
-    tmp.s[1] = f;
-    col1 = tmp.v;
-    tmp.v = zero;
-    tmp.s[2] = ( zNear + zFar ) * rangeInv;
-    tmp.s[3] = -1.0f;
-    col2 = tmp.v;
-    tmp.v = zero;
-    tmp.s[2] = zNear * zFar * rangeInv * 2.0f;
-    col3 = tmp.v;
-    result->col0.vec128 = col0;
-    result->col1.vec128 = col1;
-    result->col2.vec128 = col2;
-    result->col3.vec128 = col3;
-}
-
-static inline void vmathM4MakeFrustum( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vec_float4 lbf, rtn;
-    vec_float4 diff, sum, inv_diff;
-    vec_float4 diagonal, column, near2;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    union { vec_float4 v; float s[4]; } l, f, r, n, b, t;
-    l.s[0] = left;
-    f.s[0] = zFar;
-    r.s[0] = right;
-    n.s[0] = zNear;
-    b.s[0] = bottom;
-    t.s[0] = top;
-    lbf = vec_mergeh( l.v, f.v );
-    rtn = vec_mergeh( r.v, n.v );
-    lbf = vec_mergeh( lbf, b.v );
-    rtn = vec_mergeh( rtn, t.v );
-    diff = vec_sub( rtn, lbf );
-    sum  = vec_add( rtn, lbf );
-    inv_diff = recipf4( diff );
-    near2 = vec_splat( n.v, 0 );
-    near2 = vec_add( near2, near2 );
-    diagonal = vec_madd( near2, inv_diff, zero );
-    column = vec_madd( sum, inv_diff, zero );
-    result->col0.vec128 = vec_sel( zero, diagonal, _VECTORMATH_MASK_0xF000 );
-    result->col1.vec128 = vec_sel( zero, diagonal, _VECTORMATH_MASK_0x0F00 );
-    result->col2.vec128 = vec_sel( column, ((vec_float4){-1.0f,-1.0f,-1.0f,-1.0f}), _VECTORMATH_MASK_0x000F );
-    result->col3.vec128 = vec_sel( zero, vec_madd( diagonal, vec_splat( f.v, 0 ), zero ), _VECTORMATH_MASK_0x00F0 );
-}
-
-static inline void vmathM4MakeOrthographic( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vec_float4 lbf, rtn;
-    vec_float4 diff, sum, inv_diff, neg_inv_diff;
-    vec_float4 diagonal, column;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    union { vec_float4 v; float s[4]; } l, f, r, n, b, t;
-    l.s[0] = left;
-    f.s[0] = zFar;
-    r.s[0] = right;
-    n.s[0] = zNear;
-    b.s[0] = bottom;
-    t.s[0] = top;
-    lbf = vec_mergeh( l.v, f.v );
-    rtn = vec_mergeh( r.v, n.v );
-    lbf = vec_mergeh( lbf, b.v );
-    rtn = vec_mergeh( rtn, t.v );
-    diff = vec_sub( rtn, lbf );
-    sum  = vec_add( rtn, lbf );
-    inv_diff = recipf4( diff );
-    neg_inv_diff = negatef4( inv_diff );
-    diagonal = vec_add( inv_diff, inv_diff );
-    column = vec_madd( sum, vec_sel( neg_inv_diff, inv_diff, _VECTORMATH_MASK_0x00F0 ), zero );
-    result->col0.vec128 = vec_sel( zero, diagonal, _VECTORMATH_MASK_0xF000 );
-    result->col1.vec128 = vec_sel( zero, diagonal, _VECTORMATH_MASK_0x0F00 );
-    result->col2.vec128 = vec_sel( zero, diagonal, _VECTORMATH_MASK_0x00F0 );
-    result->col3.vec128 = vec_sel( column, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), _VECTORMATH_MASK_0x000F );
-}
-
-static inline void vmathM4Select( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, unsigned int select1 )
-{
-    vmathV4Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
-    vmathV4Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
-    vmathV4Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
-    vmathV4Select( &result->col3, &mat0->col3, &mat1->col3, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathM4Print( const VmathMatrix4 *mat )
-{
-    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
-    vmathM4GetRow( &tmpV4_0, mat, 0 );
-    vmathV4Print( &tmpV4_0 );
-    vmathM4GetRow( &tmpV4_1, mat, 1 );
-    vmathV4Print( &tmpV4_1 );
-    vmathM4GetRow( &tmpV4_2, mat, 2 );
-    vmathV4Print( &tmpV4_2 );
-    vmathM4GetRow( &tmpV4_3, mat, 3 );
-    vmathV4Print( &tmpV4_3 );
-}
-
-static inline void vmathM4Prints( const VmathMatrix4 *mat, const char *name )
-{
-    printf("%s:\n", name);
-    vmathM4Print( mat );
-}
-
-#endif
-
-static inline void vmathT3Copy( VmathTransform3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( &result->col0, &tfrm->col0 );
-    vmathV3Copy( &result->col1, &tfrm->col1 );
-    vmathV3Copy( &result->col2, &tfrm->col2 );
-    vmathV3Copy( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathT3MakeFromScalar( VmathTransform3 *result, float scalar )
-{
-    vmathV3MakeFromScalar( &result->col0, scalar );
-    vmathV3MakeFromScalar( &result->col1, scalar );
-    vmathV3MakeFromScalar( &result->col2, scalar );
-    vmathV3MakeFromScalar( &result->col3, scalar );
-}
-
-static inline void vmathT3MakeFromCols( VmathTransform3 *result, const VmathVector3 *_col0, const VmathVector3 *_col1, const VmathVector3 *_col2, const VmathVector3 *_col3 )
-{
-    vmathV3Copy( &result->col0, _col0 );
-    vmathV3Copy( &result->col1, _col1 );
-    vmathV3Copy( &result->col2, _col2 );
-    vmathV3Copy( &result->col3, _col3 );
-}
-
-static inline void vmathT3MakeFromM3V3( VmathTransform3 *result, const VmathMatrix3 *tfrm, const VmathVector3 *translateVec )
-{
-    vmathT3SetUpper3x3( result, tfrm );
-    vmathT3SetTranslation( result, translateVec );
-}
-
-static inline void vmathT3MakeFromQV3( VmathTransform3 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec )
-{
-    VmathMatrix3 tmpM3_0;
-    vmathM3MakeFromQ( &tmpM3_0, unitQuat );
-    vmathT3SetUpper3x3( result, &tmpM3_0 );
-    vmathT3SetTranslation( result, translateVec );
-}
-
-static inline void vmathT3SetCol0( VmathTransform3 *result, const VmathVector3 *_col0 )
-{
-    vmathV3Copy( &result->col0, _col0 );
-}
-
-static inline void vmathT3SetCol1( VmathTransform3 *result, const VmathVector3 *_col1 )
-{
-    vmathV3Copy( &result->col1, _col1 );
-}
-
-static inline void vmathT3SetCol2( VmathTransform3 *result, const VmathVector3 *_col2 )
-{
-    vmathV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathT3SetCol3( VmathTransform3 *result, const VmathVector3 *_col3 )
-{
-    vmathV3Copy( &result->col3, _col3 );
-}
-
-static inline void vmathT3SetCol( VmathTransform3 *result, int col, const VmathVector3 *vec )
-{
-    vmathV3Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathT3SetRow( VmathTransform3 *result, int row, const VmathVector4 *vec )
-{
-    vmathV3SetElem( &result->col0, row, vmathV4GetElem( vec, 0 ) );
-    vmathV3SetElem( &result->col1, row, vmathV4GetElem( vec, 1 ) );
-    vmathV3SetElem( &result->col2, row, vmathV4GetElem( vec, 2 ) );
-    vmathV3SetElem( &result->col3, row, vmathV4GetElem( vec, 3 ) );
-}
-
-static inline void vmathT3SetElem( VmathTransform3 *result, int col, int row, float val )
-{
-    VmathVector3 tmpV3_0;
-    vmathT3GetCol( &tmpV3_0, result, col );
-    vmathV3SetElem( &tmpV3_0, row, val );
-    vmathT3SetCol( result, col, &tmpV3_0 );
-}
-
-static inline float vmathT3GetElem( const VmathTransform3 *tfrm, int col, int row )
-{
-    VmathVector3 tmpV3_0;
-    vmathT3GetCol( &tmpV3_0, tfrm, col );
-    return vmathV3GetElem( &tmpV3_0, row );
-}
-
-static inline void vmathT3GetCol0( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col0 );
-}
-
-static inline void vmathT3GetCol1( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col1 );
-}
-
-static inline void vmathT3GetCol2( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col2 );
-}
-
-static inline void vmathT3GetCol3( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col3 );
-}
-
-static inline void vmathT3GetCol( VmathVector3 *result, const VmathTransform3 *tfrm, int col )
-{
-    vmathV3Copy( result, (&tfrm->col0 + col) );
-}
-
-static inline void vmathT3GetRow( VmathVector4 *result, const VmathTransform3 *tfrm, int row )
-{
-    vmathV4MakeFromElems( result, vmathV3GetElem( &tfrm->col0, row ), vmathV3GetElem( &tfrm->col1, row ), vmathV3GetElem( &tfrm->col2, row ), vmathV3GetElem( &tfrm->col3, row ) );
-}
-
-static inline void vmathT3Inverse( VmathTransform3 *result, const VmathTransform3 *tfrm )
-{
-    vec_float4 inv0, inv1, inv2, inv3;
-    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    tmp2 = _vmathVfCross( tfrm->col0.vec128, tfrm->col1.vec128 );
-    tmp0 = _vmathVfCross( tfrm->col1.vec128, tfrm->col2.vec128 );
-    tmp1 = _vmathVfCross( tfrm->col2.vec128, tfrm->col0.vec128 );
-    inv3 = negatef4( tfrm->col3.vec128 );
-    dot = _vmathVfDot3( tmp2, tfrm->col2.vec128 );
-    dot = vec_splat( dot, 0 );
-    invdet = recipf4( dot );
-    tmp3 = vec_mergeh( tmp0, tmp2 );
-    tmp4 = vec_mergel( tmp0, tmp2 );
-    inv0 = vec_mergeh( tmp3, tmp1 );
-    xxxx = vec_splat( inv3, 0 );
-    inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
-    inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
-    yyyy = vec_splat( inv3, 1 );
-    zzzz = vec_splat( inv3, 2 );
-    inv3 = vec_madd( inv0, xxxx, zero );
-    inv3 = vec_madd( inv1, yyyy, inv3 );
-    inv3 = vec_madd( inv2, zzzz, inv3 );
-    inv0 = vec_madd( inv0, invdet, zero );
-    inv1 = vec_madd( inv1, invdet, zero );
-    inv2 = vec_madd( inv2, invdet, zero );
-    inv3 = vec_madd( inv3, invdet, zero );
-    result->col0.vec128 = inv0;
-    result->col1.vec128 = inv1;
-    result->col2.vec128 = inv2;
-    result->col3.vec128 = inv3;
-}
-
-static inline void vmathT3OrthoInverse( VmathTransform3 *result, const VmathTransform3 *tfrm )
-{
-    vec_float4 inv0, inv1, inv2, inv3;
-    vec_float4 tmp0, tmp1;
-    vec_float4 xxxx, yyyy, zzzz;
-    tmp0 = vec_mergeh( tfrm->col0.vec128, tfrm->col2.vec128 );
-    tmp1 = vec_mergel( tfrm->col0.vec128, tfrm->col2.vec128 );
-    inv3 = negatef4( tfrm->col3.vec128 );
-    inv0 = vec_mergeh( tmp0, tfrm->col1.vec128 );
-    xxxx = vec_splat( inv3, 0 );
-    inv1 = vec_perm( tmp0, tfrm->col1.vec128, _VECTORMATH_PERM_ZBWX );
-    inv2 = vec_perm( tmp1, tfrm->col1.vec128, _VECTORMATH_PERM_XCYX );
-    yyyy = vec_splat( inv3, 1 );
-    zzzz = vec_splat( inv3, 2 );
-    inv3 = vec_madd( inv0, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    inv3 = vec_madd( inv1, yyyy, inv3 );
-    inv3 = vec_madd( inv2, zzzz, inv3 );
-    result->col0.vec128 = inv0;
-    result->col1.vec128 = inv1;
-    result->col2.vec128 = inv2;
-    result->col3.vec128 = inv3;
-}
-
-static inline void vmathT3AbsPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3AbsPerElem( &result->col0, &tfrm->col0 );
-    vmathV3AbsPerElem( &result->col1, &tfrm->col1 );
-    vmathV3AbsPerElem( &result->col2, &tfrm->col2 );
-    vmathV3AbsPerElem( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathT3MulV3( VmathVector3 *result, const VmathTransform3 *tfrm, const VmathVector3 *vec )
-{
-    vec_float4 res;
-    vec_float4 xxxx, yyyy, zzzz;
-    xxxx = vec_splat( vec->vec128, 0 );
-    yyyy = vec_splat( vec->vec128, 1 );
-    zzzz = vec_splat( vec->vec128, 2 );
-    res = vec_madd( tfrm->col0.vec128, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    res = vec_madd( tfrm->col1.vec128, yyyy, res );
-    res = vec_madd( tfrm->col2.vec128, zzzz, res );
-    result->vec128 = res;
-}
-
-static inline void vmathT3MulP3( VmathPoint3 *result, const VmathTransform3 *tfrm, const VmathPoint3 *pnt )
-{
-    vec_float4 tmp0, tmp1, res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    xxxx = vec_splat( pnt->vec128, 0 );
-    yyyy = vec_splat( pnt->vec128, 1 );
-    zzzz = vec_splat( pnt->vec128, 2 );
-    tmp0 = vec_madd( tfrm->col0.vec128, xxxx, zero );
-    tmp1 = vec_madd( tfrm->col1.vec128, yyyy, zero );
-    tmp0 = vec_madd( tfrm->col2.vec128, zzzz, tmp0 );
-    tmp1 = vec_add( tfrm->col3.vec128, tmp1 );
-    res = vec_add( tmp0, tmp1 );
-    result->vec128 = res;
-}
-
-static inline void vmathT3Mul( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 )
-{
-    VmathTransform3 tmpResult;
-    VmathPoint3 tmpP3_0, tmpP3_1;
-    vmathT3MulV3( &tmpResult.col0, tfrm0, &tfrm1->col0 );
-    vmathT3MulV3( &tmpResult.col1, tfrm0, &tfrm1->col1 );
-    vmathT3MulV3( &tmpResult.col2, tfrm0, &tfrm1->col2 );
-    vmathP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
-    vmathT3MulP3( &tmpP3_1, tfrm0, &tmpP3_0 );
-    vmathV3MakeFromP3( &tmpResult.col3, &tmpP3_1 );
-    vmathT3Copy( result, &tmpResult );
-}
-
-static inline void vmathT3MulPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 )
-{
-    vmathV3MulPerElem( &result->col0, &tfrm0->col0, &tfrm1->col0 );
-    vmathV3MulPerElem( &result->col1, &tfrm0->col1, &tfrm1->col1 );
-    vmathV3MulPerElem( &result->col2, &tfrm0->col2, &tfrm1->col2 );
-    vmathV3MulPerElem( &result->col3, &tfrm0->col3, &tfrm1->col3 );
-}
-
-static inline void vmathT3MakeIdentity( VmathTransform3 *result )
-{
-    vmathV3MakeXAxis( &result->col0 );
-    vmathV3MakeYAxis( &result->col1 );
-    vmathV3MakeZAxis( &result->col2 );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3SetUpper3x3( VmathTransform3 *result, const VmathMatrix3 *tfrm )
-{
-    vmathV3Copy( &result->col0, &tfrm->col0 );
-    vmathV3Copy( &result->col1, &tfrm->col1 );
-    vmathV3Copy( &result->col2, &tfrm->col2 );
-}
-
-static inline void vmathT3GetUpper3x3( VmathMatrix3 *result, const VmathTransform3 *tfrm )
-{
-    vmathM3MakeFromCols( result, &tfrm->col0, &tfrm->col1, &tfrm->col2 );
-}
-
-static inline void vmathT3SetTranslation( VmathTransform3 *result, const VmathVector3 *translateVec )
-{
-    vmathV3Copy( &result->col3, translateVec );
-}
-
-static inline void vmathT3GetTranslation( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col3 );
-}
-
-static inline void vmathT3MakeRotationX( VmathTransform3 *result, float radians )
-{
-    vec_float4 s, c, res1, res2;
-    vec_uint4 select_y, select_z;
-    vec_float4 zero;
-    select_y = _VECTORMATH_MASK_0x0F00;
-    select_z = _VECTORMATH_MASK_0x00F0;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
-    res1 = vec_sel( zero, c, select_y );
-    res1 = vec_sel( res1, s, select_z );
-    res2 = vec_sel( zero, negatef4(s), select_y );
-    res2 = vec_sel( res2, c, select_z );
-    vmathV3MakeXAxis( &result->col0 );
-    result->col1.vec128 = res1;
-    result->col2.vec128 = res2;
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3MakeRotationY( VmathTransform3 *result, float radians )
-{
-    vec_float4 s, c, res0, res2;
-    vec_uint4 select_x, select_z;
-    vec_float4 zero;
-    select_x = _VECTORMATH_MASK_0xF000;
-    select_z = _VECTORMATH_MASK_0x00F0;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, negatef4(s), select_z );
-    res2 = vec_sel( zero, s, select_x );
-    res2 = vec_sel( res2, c, select_z );
-    result->col0.vec128 = res0;
-    vmathV3MakeYAxis( &result->col1 );
-    result->col2.vec128 = res2;
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3MakeRotationZ( VmathTransform3 *result, float radians )
-{
-    vec_float4 s, c, res0, res1;
-    vec_uint4 select_x, select_y;
-    vec_float4 zero;
-    select_x = _VECTORMATH_MASK_0xF000;
-    select_y = _VECTORMATH_MASK_0x0F00;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, s, select_y );
-    res1 = vec_sel( zero, negatef4(s), select_x );
-    res1 = vec_sel( res1, c, select_y );
-    result->col0.vec128 = res0;
-    result->col1.vec128 = res1;
-    vmathV3MakeZAxis( &result->col2 );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3MakeRotationZYX( VmathTransform3 *result, const VmathVector3 *radiansXYZ )
-{
-    VmathVector4 tmpV4_0;
-    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    vmathV4MakeFromV3Scalar( &tmpV4_0, radiansXYZ, 0.0f );
-    angles = tmpV4_0.vec128;
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = vec_mergel( c, s );
-    Z1 = vec_mergel( negS, c );
-    Z1 = vec_andc( Z1, (vec_float4)_VECTORMATH_MASK_0x000F );
-    Y0 = vec_perm( negS, c, _VECTORMATH_PERM_BBYX );
-    Y1 = vec_perm( c, s, _VECTORMATH_PERM_BBYX );
-    X0 = vec_splat( s, 0 );
-    X1 = vec_splat( c, 0 );
-    tmp = vec_madd( Z0, Y1, zero );
-    result->col0.vec128 = vec_madd( Z0, Y0, zero );
-    result->col1.vec128 = vec_madd( Z1, X1, vec_madd( tmp, X0, zero ) );
-    result->col2.vec128 = vec_nmsub( Z1, X0, vec_madd( tmp, X1, zero ) );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3MakeRotationAxis( VmathTransform3 *result, float radians, const VmathVector3 *unitVec )
-{
-    VmathMatrix3 tmpM3_0;
-    VmathVector3 tmpV3_0;
-    vmathM3MakeRotationAxis( &tmpM3_0, radians, unitVec );
-    vmathV3MakeFromScalar( &tmpV3_0, 0.0f );
-    vmathT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
-}
-
-static inline void vmathT3MakeRotationQ( VmathTransform3 *result, const VmathQuat *unitQuat )
-{
-    VmathMatrix3 tmpM3_0;
-    VmathVector3 tmpV3_0;
-    vmathM3MakeFromQ( &tmpM3_0, unitQuat );
-    vmathV3MakeFromScalar( &tmpV3_0, 0.0f );
-    vmathT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
-}
-
-static inline void vmathT3MakeScale( VmathTransform3 *result, const VmathVector3 *scaleVec )
-{
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    result->col0.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0xF000 );
-    result->col1.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0x0F00 );
-    result->col2.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0x00F0 );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3AppendScale( VmathTransform3 *result, const VmathTransform3 *tfrm, const VmathVector3 *scaleVec )
-{
-    vmathV3ScalarMul( &result->col0, &tfrm->col0, vmathV3GetX( scaleVec ) );
-    vmathV3ScalarMul( &result->col1, &tfrm->col1, vmathV3GetY( scaleVec ) );
-    vmathV3ScalarMul( &result->col2, &tfrm->col2, vmathV3GetZ( scaleVec ) );
-    vmathV3Copy( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathT3PrependScale( VmathTransform3 *result, const VmathVector3 *scaleVec, const VmathTransform3 *tfrm )
-{
-    vmathV3MulPerElem( &result->col0, &tfrm->col0, scaleVec );
-    vmathV3MulPerElem( &result->col1, &tfrm->col1, scaleVec );
-    vmathV3MulPerElem( &result->col2, &tfrm->col2, scaleVec );
-    vmathV3MulPerElem( &result->col3, &tfrm->col3, scaleVec );
-}
-
-static inline void vmathT3MakeTranslation( VmathTransform3 *result, const VmathVector3 *translateVec )
-{
-    vmathV3MakeXAxis( &result->col0 );
-    vmathV3MakeYAxis( &result->col1 );
-    vmathV3MakeZAxis( &result->col2 );
-    vmathV3Copy( &result->col3, translateVec );
-}
-
-static inline void vmathT3Select( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, unsigned int select1 )
-{
-    vmathV3Select( &result->col0, &tfrm0->col0, &tfrm1->col0, select1 );
-    vmathV3Select( &result->col1, &tfrm0->col1, &tfrm1->col1, select1 );
-    vmathV3Select( &result->col2, &tfrm0->col2, &tfrm1->col2, select1 );
-    vmathV3Select( &result->col3, &tfrm0->col3, &tfrm1->col3, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathT3Print( const VmathTransform3 *tfrm )
-{
-    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2;
-    vmathT3GetRow( &tmpV4_0, tfrm, 0 );
-    vmathV4Print( &tmpV4_0 );
-    vmathT3GetRow( &tmpV4_1, tfrm, 1 );
-    vmathV4Print( &tmpV4_1 );
-    vmathT3GetRow( &tmpV4_2, tfrm, 2 );
-    vmathV4Print( &tmpV4_2 );
-}
-
-static inline void vmathT3Prints( const VmathTransform3 *tfrm, const char *name )
-{
-    printf("%s:\n", name);
-    vmathT3Print( tfrm );
-}
-
-#endif
-
-static inline void vmathQMakeFromM3( VmathQuat *result, const VmathMatrix3 *tfrm )
-{
-    vec_float4 res;
-    vec_float4 col0, col1, col2;
-    vec_float4 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff;
-    vec_float4 zy_xz_yx, yz_zx_xy, sum, diff;
-    vec_float4 radicand, invSqrt, scale;
-    vec_float4 res0, res1, res2, res3;
-    vec_float4 xx, yy, zz;
-    vec_uint4 select_x = _VECTORMATH_MASK_0xF000;
-    vec_uint4 select_y = _VECTORMATH_MASK_0x0F00;
-    vec_uint4 select_z = _VECTORMATH_MASK_0x00F0;
-    vec_uint4 select_w = _VECTORMATH_MASK_0x000F;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-
-    col0 = tfrm->col0.vec128;
-    col1 = tfrm->col1.vec128;
-    col2 = tfrm->col2.vec128;
-
-    /* four cases: */
-    /* trace > 0 */
-    /* else */
-    /*    xx largest diagonal element */
-    /*    yy largest diagonal element */
-    /*    zz largest diagonal element */
-
-    /* compute quaternion for each case */
-
-    xx_yy = vec_sel( col0, col1, select_y );
-    xx_yy_zz_xx = vec_perm( xx_yy, col2, _VECTORMATH_PERM_XYCX );
-    yy_zz_xx_yy = vec_perm( xx_yy, col2, _VECTORMATH_PERM_YCXY );
-    zz_xx_yy_zz = vec_perm( xx_yy, col2, _VECTORMATH_PERM_CXYC );
-
-    diagSum = vec_add( vec_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
-    diagDiff = vec_sub( vec_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
-    radicand = vec_add( vec_sel( diagDiff, diagSum, select_w ), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-    invSqrt = rsqrtf4( radicand );
-
-    zy_xz_yx = vec_sel( col0, col1, select_z );
-    zy_xz_yx = vec_perm( zy_xz_yx, col2, _VECTORMATH_PERM_ZAYX );
-    yz_zx_xy = vec_sel( col0, col1, select_x );
-    yz_zx_xy = vec_perm( yz_zx_xy, col2, _VECTORMATH_PERM_BZXX );
-
-    sum = vec_add( zy_xz_yx, yz_zx_xy );
-    diff = vec_sub( zy_xz_yx, yz_zx_xy );
-
-    scale = vec_madd( invSqrt, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), zero );
-    res0 = vec_perm( sum, diff, _VECTORMATH_PERM_XZYA );
-    res1 = vec_perm( sum, diff, _VECTORMATH_PERM_ZXXB );
-    res2 = vec_perm( sum, diff, _VECTORMATH_PERM_YXXC );
-    res3 = diff;
-    res0 = vec_sel( res0, radicand, select_x );
-    res1 = vec_sel( res1, radicand, select_y );
-    res2 = vec_sel( res2, radicand, select_z );
-    res3 = vec_sel( res3, radicand, select_w );
-    res0 = vec_madd( res0, vec_splat( scale, 0 ), zero );
-    res1 = vec_madd( res1, vec_splat( scale, 1 ), zero );
-    res2 = vec_madd( res2, vec_splat( scale, 2 ), zero );
-    res3 = vec_madd( res3, vec_splat( scale, 3 ), zero );
-
-    /* determine case and select answer */
-
-    xx = vec_splat( col0, 0 );
-    yy = vec_splat( col1, 1 );
-    zz = vec_splat( col2, 2 );
-    res = vec_sel( res0, res1, vec_cmpgt( yy, xx ) );
-    res = vec_sel( res, res2, vec_and( vec_cmpgt( zz, xx ), vec_cmpgt( zz, yy ) ) );
-    res = vec_sel( res, res3, vec_cmpgt( vec_splat( diagSum, 0 ), zero ) );
-    result->vec128 = res;
-}
-
-static inline void vmathV3Outer( VmathMatrix3 *result, const VmathVector3 *tfrm0, const VmathVector3 *tfrm1 )
-{
-    vmathV3ScalarMul( &result->col0, tfrm0, vmathV3GetX( tfrm1 ) );
-    vmathV3ScalarMul( &result->col1, tfrm0, vmathV3GetY( tfrm1 ) );
-    vmathV3ScalarMul( &result->col2, tfrm0, vmathV3GetZ( tfrm1 ) );
-}
-
-static inline void vmathV4Outer( VmathMatrix4 *result, const VmathVector4 *tfrm0, const VmathVector4 *tfrm1 )
-{
-    vmathV4ScalarMul( &result->col0, tfrm0, vmathV4GetX( tfrm1 ) );
-    vmathV4ScalarMul( &result->col1, tfrm0, vmathV4GetY( tfrm1 ) );
-    vmathV4ScalarMul( &result->col2, tfrm0, vmathV4GetZ( tfrm1 ) );
-    vmathV4ScalarMul( &result->col3, tfrm0, vmathV4GetW( tfrm1 ) );
-}
-
-static inline void vmathV3RowMul( VmathVector3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat )
-{
-    vec_float4 tmp0, tmp1, mcol0, mcol1, mcol2, res;
-    vec_float4 xxxx, yyyy, zzzz;
-    tmp0 = vec_mergeh( mat->col0.vec128, mat->col2.vec128 );
-    tmp1 = vec_mergel( mat->col0.vec128, mat->col2.vec128 );
-    xxxx = vec_splat( vec->vec128, 0 );
-    mcol0 = vec_mergeh( tmp0, mat->col1.vec128 );
-    mcol1 = vec_perm( tmp0, mat->col1.vec128, _VECTORMATH_PERM_ZBWX );
-    mcol2 = vec_perm( tmp1, mat->col1.vec128, _VECTORMATH_PERM_XCYX );
-    yyyy = vec_splat( vec->vec128, 1 );
-    res = vec_madd( mcol0, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    zzzz = vec_splat( vec->vec128, 2 );
-    res = vec_madd( mcol1, yyyy, res );
-    res = vec_madd( mcol2, zzzz, res );
-    result->vec128 = res;
-}
-
-static inline void vmathV3CrossMatrix( VmathMatrix3 *result, const VmathVector3 *vec )
-{
-    vec_float4 neg, res0, res1, res2;
-    neg = negatef4( vec->vec128 );
-    res0 = vec_perm( vec->vec128, neg, _VECTORMATH_PERM_XZBX );
-    res1 = vec_perm( vec->vec128, neg, _VECTORMATH_PERM_CXXX );
-    res2 = vec_perm( vec->vec128, neg, _VECTORMATH_PERM_YAXX );
-    res0 = vec_andc( res0, (vec_float4)_VECTORMATH_MASK_0xF000 );
-    res1 = vec_andc( res1, (vec_float4)_VECTORMATH_MASK_0x0F00 );
-    res2 = vec_andc( res2, (vec_float4)_VECTORMATH_MASK_0x00F0 );
-    result->col0.vec128 = res0;
-    result->col1.vec128 = res1;
-    result->col2.vec128 = res2;
-}
-
-static inline void vmathV3CrossMatrixMul( VmathMatrix3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat )
-{
-    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2;
-    vmathV3Cross( &tmpV3_0, vec, &mat->col0 );
-    vmathV3Cross( &tmpV3_1, vec, &mat->col1 );
-    vmathV3Cross( &tmpV3_2, vec, &mat->col2 );
-    vmathM3MakeFromCols( result, &tmpV3_0, &tmpV3_1, &tmpV3_2 );
-}
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_AOS_C_H
+#define _VECTORMATH_MAT_AOS_C_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+ */
+#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B })
+#define _VECTORMATH_PERM_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_XZBX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X })     
+#define _VECTORMATH_PERM_CXXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_YAXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W, _VECTORMATH_PERM_Z })
+#define _VECTORMATH_PERM_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y })
+#define _VECTORMATH_PERM_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_ZAYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_BZXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
+#define _VECTORMATH_PERM_ZXXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_B })
+#define _VECTORMATH_PERM_YXXC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_BBYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+static inline void vmathM3Copy( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Copy( &result->col0, &mat->col0 );
+    vmathV3Copy( &result->col1, &mat->col1 );
+    vmathV3Copy( &result->col2, &mat->col2 );
+}
+
+static inline void vmathM3MakeFromScalar( VmathMatrix3 *result, float scalar )
+{
+    vmathV3MakeFromScalar( &result->col0, scalar );
+    vmathV3MakeFromScalar( &result->col1, scalar );
+    vmathV3MakeFromScalar( &result->col2, scalar );
+}
+
+static inline void vmathM3MakeFromQ( VmathMatrix3 *result, const VmathQuat *unitQuat )
+{
+    vec_float4 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2;
+    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    vec_uint4 select_x = _VECTORMATH_MASK_0xF000;
+    vec_uint4 select_z = _VECTORMATH_MASK_0x00F0;
+    xyzw_2 = vec_add( unitQuat->vec128, unitQuat->vec128 );
+    wwww = vec_splat( unitQuat->vec128, 3 );
+    yzxw = vec_perm( unitQuat->vec128, unitQuat->vec128, _VECTORMATH_PERM_YZXW );
+    zxyw = vec_perm( unitQuat->vec128, unitQuat->vec128, _VECTORMATH_PERM_ZXYW );
+    yzxw_2 = vec_perm( xyzw_2, xyzw_2, _VECTORMATH_PERM_YZXW );
+    zxyw_2 = vec_perm( xyzw_2, xyzw_2, _VECTORMATH_PERM_ZXYW );
+    tmp0 = vec_madd( yzxw_2, wwww, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmp1 = vec_nmsub( yzxw, yzxw_2, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+    tmp2 = vec_madd( yzxw, xyzw_2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmp0 = vec_madd( zxyw, xyzw_2, tmp0 );
+    tmp1 = vec_nmsub( zxyw, zxyw_2, tmp1 );
+    tmp2 = vec_nmsub( zxyw_2, wwww, tmp2 );
+    tmp3 = vec_sel( tmp0, tmp1, select_x );
+    tmp4 = vec_sel( tmp1, tmp2, select_x );
+    tmp5 = vec_sel( tmp2, tmp0, select_x );
+    result->col0.vec128 = vec_sel( tmp3, tmp2, select_z );
+    result->col1.vec128 = vec_sel( tmp4, tmp0, select_z );
+    result->col2.vec128 = vec_sel( tmp5, tmp1, select_z );
+}
+
+static inline void vmathM3MakeFromCols( VmathMatrix3 *result, const VmathVector3 *_col0, const VmathVector3 *_col1, const VmathVector3 *_col2 )
+{
+    vmathV3Copy( &result->col0, _col0 );
+    vmathV3Copy( &result->col1, _col1 );
+    vmathV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathM3SetCol0( VmathMatrix3 *result, const VmathVector3 *_col0 )
+{
+    vmathV3Copy( &result->col0, _col0 );
+}
+
+static inline void vmathM3SetCol1( VmathMatrix3 *result, const VmathVector3 *_col1 )
+{
+    vmathV3Copy( &result->col1, _col1 );
+}
+
+static inline void vmathM3SetCol2( VmathMatrix3 *result, const VmathVector3 *_col2 )
+{
+    vmathV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathM3SetCol( VmathMatrix3 *result, int col, const VmathVector3 *vec )
+{
+    vmathV3Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathM3SetRow( VmathMatrix3 *result, int row, const VmathVector3 *vec )
+{
+    vmathV3SetElem( &result->col0, row, vmathV3GetElem( vec, 0 ) );
+    vmathV3SetElem( &result->col1, row, vmathV3GetElem( vec, 1 ) );
+    vmathV3SetElem( &result->col2, row, vmathV3GetElem( vec, 2 ) );
+}
+
+static inline void vmathM3SetElem( VmathMatrix3 *result, int col, int row, float val )
+{
+    VmathVector3 tmpV3_0;
+    vmathM3GetCol( &tmpV3_0, result, col );
+    vmathV3SetElem( &tmpV3_0, row, val );
+    vmathM3SetCol( result, col, &tmpV3_0 );
+}
+
+static inline float vmathM3GetElem( const VmathMatrix3 *mat, int col, int row )
+{
+    VmathVector3 tmpV3_0;
+    vmathM3GetCol( &tmpV3_0, mat, col );
+    return vmathV3GetElem( &tmpV3_0, row );
+}
+
+static inline void vmathM3GetCol0( VmathVector3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Copy( result, &mat->col0 );
+}
+
+static inline void vmathM3GetCol1( VmathVector3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Copy( result, &mat->col1 );
+}
+
+static inline void vmathM3GetCol2( VmathVector3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Copy( result, &mat->col2 );
+}
+
+static inline void vmathM3GetCol( VmathVector3 *result, const VmathMatrix3 *mat, int col )
+{
+    vmathV3Copy( result, (&mat->col0 + col) );
+}
+
+static inline void vmathM3GetRow( VmathVector3 *result, const VmathMatrix3 *mat, int row )
+{
+    vmathV3MakeFromElems( result, vmathV3GetElem( &mat->col0, row ), vmathV3GetElem( &mat->col1, row ), vmathV3GetElem( &mat->col2, row ) );
+}
+
+static inline void vmathM3Transpose( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vec_float4 tmp0, tmp1, res0, res1, res2;
+    tmp0 = vec_mergeh( mat->col0.vec128, mat->col2.vec128 );
+    tmp1 = vec_mergel( mat->col0.vec128, mat->col2.vec128 );
+    res0 = vec_mergeh( tmp0, mat->col1.vec128 );
+    res1 = vec_perm( tmp0, mat->col1.vec128, _VECTORMATH_PERM_ZBWX );
+    res2 = vec_perm( tmp1, mat->col1.vec128, _VECTORMATH_PERM_XCYX );
+    result->col0.vec128 = res0;
+    result->col1.vec128 = res1;
+    result->col2.vec128 = res2;
+}
+
+static inline void vmathM3Inverse( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    tmp2 = _vmathVfCross( mat->col0.vec128, mat->col1.vec128 );
+    tmp0 = _vmathVfCross( mat->col1.vec128, mat->col2.vec128 );
+    tmp1 = _vmathVfCross( mat->col2.vec128, mat->col0.vec128 );
+    dot = _vmathVfDot3( tmp2, mat->col2.vec128 );
+    dot = vec_splat( dot, 0 );
+    invdet = recipf4( dot );
+    tmp3 = vec_mergeh( tmp0, tmp2 );
+    tmp4 = vec_mergel( tmp0, tmp2 );
+    inv0 = vec_mergeh( tmp3, tmp1 );
+    inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
+    inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
+    inv0 = vec_madd( inv0, invdet, zero );
+    inv1 = vec_madd( inv1, invdet, zero );
+    inv2 = vec_madd( inv2, invdet, zero );
+    result->col0.vec128 = inv0;
+    result->col1.vec128 = inv1;
+    result->col2.vec128 = inv2;
+}
+
+static inline float vmathM3Determinant( const VmathMatrix3 *mat )
+{
+    VmathVector3 tmpV3_0;
+    vmathV3Cross( &tmpV3_0, &mat->col0, &mat->col1 );
+    return vmathV3Dot( &mat->col2, &tmpV3_0 );
+}
+
+static inline void vmathM3Add( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
+{
+    vmathV3Add( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV3Add( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV3Add( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathM3Sub( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
+{
+    vmathV3Sub( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV3Sub( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV3Sub( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathM3Neg( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Neg( &result->col0, &mat->col0 );
+    vmathV3Neg( &result->col1, &mat->col1 );
+    vmathV3Neg( &result->col2, &mat->col2 );
+}
+
+static inline void vmathM3AbsPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3AbsPerElem( &result->col0, &mat->col0 );
+    vmathV3AbsPerElem( &result->col1, &mat->col1 );
+    vmathV3AbsPerElem( &result->col2, &mat->col2 );
+}
+
+static inline void vmathM3ScalarMul( VmathMatrix3 *result, const VmathMatrix3 *mat, float scalar )
+{
+    vmathV3ScalarMul( &result->col0, &mat->col0, scalar );
+    vmathV3ScalarMul( &result->col1, &mat->col1, scalar );
+    vmathV3ScalarMul( &result->col2, &mat->col2, scalar );
+}
+
+static inline void vmathM3MulV3( VmathVector3 *result, const VmathMatrix3 *mat, const VmathVector3 *vec )
+{
+    vec_float4 res;
+    vec_float4 xxxx, yyyy, zzzz;
+    xxxx = vec_splat( vec->vec128, 0 );
+    yyyy = vec_splat( vec->vec128, 1 );
+    zzzz = vec_splat( vec->vec128, 2 );
+    res = vec_madd( mat->col0.vec128, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    res = vec_madd( mat->col1.vec128, yyyy, res );
+    res = vec_madd( mat->col2.vec128, zzzz, res );
+    result->vec128 = res;
+}
+
+static inline void vmathM3Mul( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
+{
+    VmathMatrix3 tmpResult;
+    vmathM3MulV3( &tmpResult.col0, mat0, &mat1->col0 );
+    vmathM3MulV3( &tmpResult.col1, mat0, &mat1->col1 );
+    vmathM3MulV3( &tmpResult.col2, mat0, &mat1->col2 );
+    vmathM3Copy( result, &tmpResult );
+}
+
+static inline void vmathM3MulPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
+{
+    vmathV3MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV3MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV3MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathM3MakeIdentity( VmathMatrix3 *result )
+{
+    vmathV3MakeXAxis( &result->col0 );
+    vmathV3MakeYAxis( &result->col1 );
+    vmathV3MakeZAxis( &result->col2 );
+}
+
+static inline void vmathM3MakeRotationX( VmathMatrix3 *result, float radians )
+{
+    vec_float4 s, c, res1, res2;
+    vec_uint4 select_y, select_z;
+    vec_float4 zero;
+    select_y = _VECTORMATH_MASK_0x0F00;
+    select_z = _VECTORMATH_MASK_0x00F0;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
+    res1 = vec_sel( zero, c, select_y );
+    res1 = vec_sel( res1, s, select_z );
+    res2 = vec_sel( zero, negatef4(s), select_y );
+    res2 = vec_sel( res2, c, select_z );
+    vmathV3MakeXAxis( &result->col0 );
+    result->col1.vec128 = res1;
+    result->col2.vec128 = res2;
+}
+
+static inline void vmathM3MakeRotationY( VmathMatrix3 *result, float radians )
+{
+    vec_float4 s, c, res0, res2;
+    vec_uint4 select_x, select_z;
+    vec_float4 zero;
+    select_x = _VECTORMATH_MASK_0xF000;
+    select_z = _VECTORMATH_MASK_0x00F0;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, negatef4(s), select_z );
+    res2 = vec_sel( zero, s, select_x );
+    res2 = vec_sel( res2, c, select_z );
+    result->col0.vec128 = res0;
+    vmathV3MakeYAxis( &result->col1 );
+    result->col2.vec128 = res2;
+}
+
+static inline void vmathM3MakeRotationZ( VmathMatrix3 *result, float radians )
+{
+    vec_float4 s, c, res0, res1;
+    vec_uint4 select_x, select_y;
+    vec_float4 zero;
+    select_x = _VECTORMATH_MASK_0xF000;
+    select_y = _VECTORMATH_MASK_0x0F00;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, s, select_y );
+    res1 = vec_sel( zero, negatef4(s), select_x );
+    res1 = vec_sel( res1, c, select_y );
+    result->col0.vec128 = res0;
+    result->col1.vec128 = res1;
+    vmathV3MakeZAxis( &result->col2 );
+}
+
+static inline void vmathM3MakeRotationZYX( VmathMatrix3 *result, const VmathVector3 *radiansXYZ )
+{
+    VmathVector4 tmpV4_0;
+    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    vmathV4MakeFromV3Scalar( &tmpV4_0, radiansXYZ, 0.0f );
+    angles = tmpV4_0.vec128;
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = vec_mergel( c, s );
+    Z1 = vec_mergel( negS, c );
+    Z1 = vec_andc( Z1, (vec_float4)_VECTORMATH_MASK_0x000F );
+    Y0 = vec_perm( negS, c, _VECTORMATH_PERM_BBYX );
+    Y1 = vec_perm( c, s, _VECTORMATH_PERM_BBYX );
+    X0 = vec_splat( s, 0 );
+    X1 = vec_splat( c, 0 );
+    tmp = vec_madd( Z0, Y1, zero );
+    result->col0.vec128 = vec_madd( Z0, Y0, zero );
+    result->col1.vec128 = vec_madd( Z1, X1, vec_madd( tmp, X0, zero ) );
+    result->col2.vec128 = vec_nmsub( Z1, X0, vec_madd( tmp, X1, zero ) );
+}
+
+static inline void vmathM3MakeRotationAxis( VmathMatrix3 *result, float radians, const VmathVector3 *unitVec )
+{
+    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    axis = unitVec->vec128;
+    sincosf4( (vec_float4){radians,radians,radians,radians}, &s, &c );
+    xxxx = vec_splat( axis, 0 );
+    yyyy = vec_splat( axis, 1 );
+    zzzz = vec_splat( axis, 2 );
+    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
+    axisS = vec_madd( axis, s, zero );
+    negAxisS = negatef4( axisS );
+    tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
+    tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
+    tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
+    tmp0 = vec_sel( tmp0, c, _VECTORMATH_MASK_0xF000 );
+    tmp1 = vec_sel( tmp1, c, _VECTORMATH_MASK_0x0F00 );
+    tmp2 = vec_sel( tmp2, c, _VECTORMATH_MASK_0x00F0 );
+    result->col0.vec128 = vec_madd( vec_madd( axis, xxxx, zero ), oneMinusC, tmp0 );
+    result->col1.vec128 = vec_madd( vec_madd( axis, yyyy, zero ), oneMinusC, tmp1 );
+    result->col2.vec128 = vec_madd( vec_madd( axis, zzzz, zero ), oneMinusC, tmp2 );
+}
+
+static inline void vmathM3MakeRotationQ( VmathMatrix3 *result, const VmathQuat *unitQuat )
+{
+    vmathM3MakeFromQ( result, unitQuat );
+}
+
+static inline void vmathM3MakeScale( VmathMatrix3 *result, const VmathVector3 *scaleVec )
+{
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    result->col0.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0xF000 );
+    result->col1.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0x0F00 );
+    result->col2.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0x00F0 );
+}
+
+static inline void vmathM3AppendScale( VmathMatrix3 *result, const VmathMatrix3 *mat, const VmathVector3 *scaleVec )
+{
+    vmathV3ScalarMul( &result->col0, &mat->col0, vmathV3GetX( scaleVec ) );
+    vmathV3ScalarMul( &result->col1, &mat->col1, vmathV3GetY( scaleVec ) );
+    vmathV3ScalarMul( &result->col2, &mat->col2, vmathV3GetZ( scaleVec ) );
+}
+
+static inline void vmathM3PrependScale( VmathMatrix3 *result, const VmathVector3 *scaleVec, const VmathMatrix3 *mat )
+{
+    vmathV3MulPerElem( &result->col0, &mat->col0, scaleVec );
+    vmathV3MulPerElem( &result->col1, &mat->col1, scaleVec );
+    vmathV3MulPerElem( &result->col2, &mat->col2, scaleVec );
+}
+
+static inline void vmathM3Select( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, unsigned int select1 )
+{
+    vmathV3Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
+    vmathV3Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
+    vmathV3Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathM3Print( const VmathMatrix3 *mat )
+{
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2;
+    vmathM3GetRow( &tmpV3_0, mat, 0 );
+    vmathV3Print( &tmpV3_0 );
+    vmathM3GetRow( &tmpV3_1, mat, 1 );
+    vmathV3Print( &tmpV3_1 );
+    vmathM3GetRow( &tmpV3_2, mat, 2 );
+    vmathV3Print( &tmpV3_2 );
+}
+
+static inline void vmathM3Prints( const VmathMatrix3 *mat, const char *name )
+{
+    printf("%s:\n", name);
+    vmathM3Print( mat );
+}
+
+#endif
+
+static inline void vmathM4Copy( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( &result->col0, &mat->col0 );
+    vmathV4Copy( &result->col1, &mat->col1 );
+    vmathV4Copy( &result->col2, &mat->col2 );
+    vmathV4Copy( &result->col3, &mat->col3 );
+}
+
+static inline void vmathM4MakeFromScalar( VmathMatrix4 *result, float scalar )
+{
+    vmathV4MakeFromScalar( &result->col0, scalar );
+    vmathV4MakeFromScalar( &result->col1, scalar );
+    vmathV4MakeFromScalar( &result->col2, scalar );
+    vmathV4MakeFromScalar( &result->col3, scalar );
+}
+
+static inline void vmathM4MakeFromT3( VmathMatrix4 *result, const VmathTransform3 *mat )
+{
+    vmathV4MakeFromV3Scalar( &result->col0, &mat->col0, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col1, &mat->col1, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col2, &mat->col2, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col3, &mat->col3, 1.0f );
+}
+
+static inline void vmathM4MakeFromCols( VmathMatrix4 *result, const VmathVector4 *_col0, const VmathVector4 *_col1, const VmathVector4 *_col2, const VmathVector4 *_col3 )
+{
+    vmathV4Copy( &result->col0, _col0 );
+    vmathV4Copy( &result->col1, _col1 );
+    vmathV4Copy( &result->col2, _col2 );
+    vmathV4Copy( &result->col3, _col3 );
+}
+
+static inline void vmathM4MakeFromM3V3( VmathMatrix4 *result, const VmathMatrix3 *mat, const VmathVector3 *translateVec )
+{
+    vmathV4MakeFromV3Scalar( &result->col0, &mat->col0, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col1, &mat->col1, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col2, &mat->col2, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
+}
+
+static inline void vmathM4MakeFromQV3( VmathMatrix4 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec )
+{
+    VmathMatrix3 mat;
+    vmathM3MakeFromQ( &mat, unitQuat );
+    vmathV4MakeFromV3Scalar( &result->col0, &mat.col0, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col1, &mat.col1, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col2, &mat.col2, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
+}
+
+static inline void vmathM4SetCol0( VmathMatrix4 *result, const VmathVector4 *_col0 )
+{
+    vmathV4Copy( &result->col0, _col0 );
+}
+
+static inline void vmathM4SetCol1( VmathMatrix4 *result, const VmathVector4 *_col1 )
+{
+    vmathV4Copy( &result->col1, _col1 );
+}
+
+static inline void vmathM4SetCol2( VmathMatrix4 *result, const VmathVector4 *_col2 )
+{
+    vmathV4Copy( &result->col2, _col2 );
+}
+
+static inline void vmathM4SetCol3( VmathMatrix4 *result, const VmathVector4 *_col3 )
+{
+    vmathV4Copy( &result->col3, _col3 );
+}
+
+static inline void vmathM4SetCol( VmathMatrix4 *result, int col, const VmathVector4 *vec )
+{
+    vmathV4Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathM4SetRow( VmathMatrix4 *result, int row, const VmathVector4 *vec )
+{
+    vmathV4SetElem( &result->col0, row, vmathV4GetElem( vec, 0 ) );
+    vmathV4SetElem( &result->col1, row, vmathV4GetElem( vec, 1 ) );
+    vmathV4SetElem( &result->col2, row, vmathV4GetElem( vec, 2 ) );
+    vmathV4SetElem( &result->col3, row, vmathV4GetElem( vec, 3 ) );
+}
+
+static inline void vmathM4SetElem( VmathMatrix4 *result, int col, int row, float val )
+{
+    VmathVector4 tmpV3_0;
+    vmathM4GetCol( &tmpV3_0, result, col );
+    vmathV4SetElem( &tmpV3_0, row, val );
+    vmathM4SetCol( result, col, &tmpV3_0 );
+}
+
+static inline float vmathM4GetElem( const VmathMatrix4 *mat, int col, int row )
+{
+    VmathVector4 tmpV4_0;
+    vmathM4GetCol( &tmpV4_0, mat, col );
+    return vmathV4GetElem( &tmpV4_0, row );
+}
+
+static inline void vmathM4GetCol0( VmathVector4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( result, &mat->col0 );
+}
+
+static inline void vmathM4GetCol1( VmathVector4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( result, &mat->col1 );
+}
+
+static inline void vmathM4GetCol2( VmathVector4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( result, &mat->col2 );
+}
+
+static inline void vmathM4GetCol3( VmathVector4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( result, &mat->col3 );
+}
+
+static inline void vmathM4GetCol( VmathVector4 *result, const VmathMatrix4 *mat, int col )
+{
+    vmathV4Copy( result, (&mat->col0 + col) );
+}
+
+static inline void vmathM4GetRow( VmathVector4 *result, const VmathMatrix4 *mat, int row )
+{
+    vmathV4MakeFromElems( result, vmathV4GetElem( &mat->col0, row ), vmathV4GetElem( &mat->col1, row ), vmathV4GetElem( &mat->col2, row ), vmathV4GetElem( &mat->col3, row ) );
+}
+
+static inline void vmathM4Transpose( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3;
+    tmp0 = vec_mergeh( mat->col0.vec128, mat->col2.vec128 );
+    tmp1 = vec_mergeh( mat->col1.vec128, mat->col3.vec128 );
+    tmp2 = vec_mergel( mat->col0.vec128, mat->col2.vec128 );
+    tmp3 = vec_mergel( mat->col1.vec128, mat->col3.vec128 );
+    res0 = vec_mergeh( tmp0, tmp1 );
+    res1 = vec_mergel( tmp0, tmp1 );
+    res2 = vec_mergeh( tmp2, tmp3 );
+    res3 = vec_mergel( tmp2, tmp3 );
+    result->col0.vec128 = res0;
+    result->col1.vec128 = res1;
+    result->col2.vec128 = res2;
+    result->col3.vec128 = res3;
+}
+
+static inline void vmathM4Inverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vector float in0, in1, in2, in3;
+    vector float tmp0, tmp1, tmp2, tmp3;
+    vector float cof0, cof1, cof2, cof3;
+    vector float t0, t1, t2, t3;
+    vector float t01, t02, t03, t12, t23;
+    vector float t1r, t2r;
+    vector float t01r, t02r, t03r, t12r, t23r;
+    vector float t1r3, t1r3r;
+    vector float det, det0, det1, det2, det3, invdet;
+    vector float vzero = (vector float){0.0};
+    in0 = mat->col0.vec128;
+    in1 = mat->col1.vec128;
+    in2 = mat->col2.vec128;
+    in3 = mat->col3.vec128;
+    /* Perform transform of the input matrix of the form:
+     *    A B C D
+     *    E F G H
+     *    I J K L
+     *    M N O P
+     *
+     * The pseudo transpose of the input matrix is trans:
+     *    A E I M
+     *    J N B F
+     *    C G K O
+     *    L P D H
+     */
+    tmp0 = vec_perm(in0, in1, _VECTORMATH_PERM_XAZC);	/* A E C G */
+    tmp1 = vec_perm(in2, in3, _VECTORMATH_PERM_XAZC);	/* I M K O */
+    tmp2 = vec_perm(in0, in1, _VECTORMATH_PERM_YBWD);	/* B F D H */
+    tmp3 = vec_perm(in2, in3, _VECTORMATH_PERM_YBWD);	/* J N L P */
+    t0 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_XYAB);	/* A E I M */
+    t1 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_XYAB);	/* J N B F */
+    t2 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_ZWCD);	/* C G K O */
+    t3 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_ZWCD);	/* L P D H */
+    /* Generate a cofactor matrix. The computed cofactors reside in
+     * cof0, cof1, cof2, cof3.
+     */
+    t23 = vec_madd(t2, t3, vzero);		/* CL GP KD OH */
+    t23 = vec_perm(t23, t23, _VECTORMATH_PERM_YXWZ);	/* GP CL OH KD */
+    cof0 = vec_nmsub(t1, t23, vzero);		/* -(JGP NCL FOH BKD) */
+    cof1 = vec_nmsub(t0, t23, vzero);		/* -(AGP ECL IOH MKD) */
+    t23r = vec_sld(t23, t23, 8);			/* OH KD GP CL */
+    cof0 = vec_madd(t1, t23r, cof0);		/* JOH NKD BGP FCL + cof0 */
+    cof1 = vec_madd(t0, t23r, cof1);		/* AOH EKD IGP MCL + cof1 */
+    cof1 = vec_sld(cof1, cof1, 8);		/* IGP MCL AOH EKD - IOH MKD AGP ECL */
+    t12 = vec_madd(t1, t2, vzero);		/* JC NG BK FO */
+    t12 = vec_perm(t12, t12, _VECTORMATH_PERM_YXWZ);	/* NG JC FO BK */
+    cof0 = vec_madd(t3, t12, cof0);		/* LNG PJC DFO HBK + cof0 */
+    cof3 = vec_madd(t0, t12, vzero);		/* ANG EJC IFO MBK */
+    t12r = vec_sld(t12, t12, 8);			/* FO BK NG JC */
+    cof0 = vec_nmsub(t3, t12r, cof0);		/* cof0 - LFO PBK DNG HJC */
+    cof3 = vec_nmsub(t0, t12r, cof3);		/* cof3 - AFO EBK ING MJC */
+    cof3 = vec_sld(cof3, cof3, 8);		/* ING MJC AFO EBK - IFO MBK ANG EJC */
+    t1r = vec_sld(t1, t1, 8);			/* B F J N */
+    t2r = vec_sld(t2, t2, 8);			/* K O C G */
+    t1r3 = vec_madd(t1r, t3, vzero);		/* BL FP JD NH */
+    t1r3 = vec_perm(t1r3, t1r3, _VECTORMATH_PERM_YXWZ);	/* FP BL NH JD */
+    cof0 = vec_madd(t2r, t1r3, cof0);		/* KFP OBL CNH GJD + cof0 */
+    cof2 = vec_madd(t0, t1r3, vzero);		/* AFP EBL INH MJD */
+    t1r3r = vec_sld(t1r3, t1r3, 8);		/* NH JD FP BL */
+    cof0 = vec_nmsub(t2r, t1r3r, cof0);		/* cof0 - KNH OJD CFP GBL */
+    cof2 = vec_nmsub(t0, t1r3r, cof2);		/* cof2 - ANH EJD IFP MBL */
+    cof2 = vec_sld(cof2, cof2, 8);		/* IFP MBL ANH EJD - INH MJD AFP EBL */
+    t01 = vec_madd(t0, t1, vzero);		/* AJ EN IB MF */
+    t01 = vec_perm(t01, t01, _VECTORMATH_PERM_YXWZ);	/* EN AJ MF IB */
+    cof2 = vec_nmsub(t3, t01, cof2);		/* cof2 - LEN PAJ DMF HIB */
+    cof3 = vec_madd(t2r, t01, cof3);		/* KEN OAJ CMF GIB + cof3 */ 
+    t01r = vec_sld(t01, t01, 8);			/* MF IB EN AJ */
+    cof2 = vec_madd(t3, t01r, cof2);		/* LMF PIB DEN HAJ + cof2 */
+    cof3 = vec_nmsub(t2r, t01r, cof3);		/* cof3 - KMF OIB CEN GAJ */
+    t03 = vec_madd(t0, t3, vzero);		/* AL EP ID MH */
+    t03 = vec_perm(t03, t03, _VECTORMATH_PERM_YXWZ);	/* EP AL MH ID */
+    cof1 = vec_nmsub(t2r, t03, cof1);		/* cof1 - KEP OAL CMH GID */
+    cof2 = vec_madd(t1, t03, cof2);		/* JEP NAL BMH FID + cof2 */
+    t03r = vec_sld(t03, t03, 8);			/* MH ID EP AL */
+    cof1 = vec_madd(t2r, t03r, cof1);		/* KMH OID CEP GAL + cof1 */
+    cof2 = vec_nmsub(t1, t03r, cof2);		/* cof2 - JMH NID BEP FAL */ 
+    t02 = vec_madd(t0, t2r, vzero);		/* AK EO IC MG */
+    t02 = vec_perm(t02, t02, _VECTORMATH_PERM_YXWZ);	/* E0 AK MG IC */
+    cof1 = vec_madd(t3, t02, cof1);		/* LEO PAK DMG HIC + cof1 */
+    cof3 = vec_nmsub(t1, t02, cof3);		/* cof3 - JEO NAK BMG FIC */
+    t02r = vec_sld(t02, t02, 8);			/* MG IC EO AK */
+    cof1 = vec_nmsub(t3, t02r, cof1);		/* cof1 - LMG PIC DEO HAK */
+    cof3 = vec_madd(t1, t02r, cof3);		/* JMG NIC BEO FAK + cof3 */
+    /* Compute the determinant of the matrix 
+     *
+     * det = sum_across(t0 * cof0);
+     *
+     * We perform a sum across the entire vector so that 
+     * we don't have to splat the result when multiplying the
+     * cofactors by the inverse of the determinant.
+     */
+    det  = vec_madd(t0, cof0, vzero);
+    det0 = vec_splat(det, 0);
+    det1 = vec_splat(det, 1);
+    det2 = vec_splat(det, 2);
+    det3 = vec_splat(det, 3);
+    det  = vec_add(det0, det1);
+    det2 = vec_add(det2, det3);
+    det  = vec_add(det, det2);
+    /* Compute the reciprocal of the determinant.
+     */
+    invdet = recipf4(det);
+    /* Multiply the cofactors by the reciprocal of the determinant.
+     */ 
+    result->col0.vec128 = vec_madd(cof0, invdet, vzero);
+    result->col1.vec128 = vec_madd(cof1, invdet, vzero);
+    result->col2.vec128 = vec_madd(cof2, invdet, vzero);
+    result->col3.vec128 = vec_madd(cof3, invdet, vzero);
+}
+
+static inline void vmathM4AffineInverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    VmathTransform3 affineMat, tmpT3_0;
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    vmathV4GetXYZ( &tmpV3_0, &mat->col0 );
+    vmathT3SetCol0( &affineMat, &tmpV3_0 );
+    vmathV4GetXYZ( &tmpV3_1, &mat->col1 );
+    vmathT3SetCol1( &affineMat, &tmpV3_1 );
+    vmathV4GetXYZ( &tmpV3_2, &mat->col2 );
+    vmathT3SetCol2( &affineMat, &tmpV3_2 );
+    vmathV4GetXYZ( &tmpV3_3, &mat->col3 );
+    vmathT3SetCol3( &affineMat, &tmpV3_3 );
+    vmathT3Inverse( &tmpT3_0, &affineMat );
+    vmathM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline void vmathM4OrthoInverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    VmathTransform3 affineMat, tmpT3_0;
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    vmathV4GetXYZ( &tmpV3_0, &mat->col0 );
+    vmathT3SetCol0( &affineMat, &tmpV3_0 );
+    vmathV4GetXYZ( &tmpV3_1, &mat->col1 );
+    vmathT3SetCol1( &affineMat, &tmpV3_1 );
+    vmathV4GetXYZ( &tmpV3_2, &mat->col2 );
+    vmathT3SetCol2( &affineMat, &tmpV3_2 );
+    vmathV4GetXYZ( &tmpV3_3, &mat->col3 );
+    vmathT3SetCol3( &affineMat, &tmpV3_3 );
+    vmathT3OrthoInverse( &tmpT3_0, &affineMat );
+    vmathM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline float vmathM4Determinant( const VmathMatrix4 *mat )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vector float in0, in1, in2, in3;
+    vector float tmp0, tmp1, tmp2, tmp3;
+    vector float cof0;
+    vector float t0, t1, t2, t3;
+    vector float t12, t23;
+    vector float t1r, t2r;
+    vector float t12r, t23r;
+    vector float t1r3, t1r3r;
+    vector float vzero = (vector float){0.0};
+    union { vec_float4 v; float s[4]; } tmp;
+    in0 = mat->col0.vec128;
+    in1 = mat->col1.vec128;
+    in2 = mat->col2.vec128;
+    in3 = mat->col3.vec128;
+    /* Perform transform of the input matrix of the form:
+     *    A B C D
+     *    E F G H
+     *    I J K L
+     *    M N O P
+     *
+     * The pseudo transpose of the input matrix is trans:
+     *    A E I M
+     *    J N B F
+     *    C G K O
+     *    L P D H
+     */
+    tmp0 = vec_perm(in0, in1, _VECTORMATH_PERM_XAZC);	/* A E C G */
+    tmp1 = vec_perm(in2, in3, _VECTORMATH_PERM_XAZC);	/* I M K O */
+    tmp2 = vec_perm(in0, in1, _VECTORMATH_PERM_YBWD);	/* B F D H */
+    tmp3 = vec_perm(in2, in3, _VECTORMATH_PERM_YBWD);	/* J N L P */
+    t0 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_XYAB);	/* A E I M */
+    t1 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_XYAB);	/* J N B F */
+    t2 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_ZWCD);	/* C G K O */
+    t3 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_ZWCD);	/* L P D H */
+    /* Generate a cofactor matrix. The computed cofactors reside in
+     * cof0, cof1, cof2, cof3.
+     */
+    t23 = vec_madd(t2, t3, vzero);		/* CL GP KD OH */
+    t23 = vec_perm(t23, t23, _VECTORMATH_PERM_YXWZ);	/* GP CL OH KD */
+    cof0 = vec_nmsub(t1, t23, vzero);		/* -(JGP NCL FOH BKD) */
+    t23r = vec_sld(t23, t23, 8);			/* OH KD GP CL */
+    cof0 = vec_madd(t1, t23r, cof0);		/* JOH NKD BGP FCL + cof0 */
+    t12 = vec_madd(t1, t2, vzero);		/* JC NG BK FO */
+    t12 = vec_perm(t12, t12, _VECTORMATH_PERM_YXWZ);	/* NG JC FO BK */
+    cof0 = vec_madd(t3, t12, cof0);		/* LNG PJC DFO HBK + cof0 */
+    t12r = vec_sld(t12, t12, 8);			/* FO BK NG JC */
+    cof0 = vec_nmsub(t3, t12r, cof0);		/* cof0 - LFO PBK DNG HJC */
+    t1r = vec_sld(t1, t1, 8);			/* B F J N */
+    t2r = vec_sld(t2, t2, 8);			/* K O C G */
+    t1r3 = vec_madd(t1r, t3, vzero);		/* BL FP JD NH */
+    t1r3 = vec_perm(t1r3, t1r3, _VECTORMATH_PERM_YXWZ);	/* FP BL NH JD */
+    cof0 = vec_madd(t2r, t1r3, cof0);		/* KFP OBL CNH GJD + cof0 */
+    t1r3r = vec_sld(t1r3, t1r3, 8);		/* NH JD FP BL */
+    cof0 = vec_nmsub(t2r, t1r3r, cof0);		/* cof0 - KNH OJD CFP GBL */
+    tmp.v = _vmathVfDot4(t0,cof0);
+    return tmp.s[0];
+}
+
+static inline void vmathM4Add( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
+{
+    vmathV4Add( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV4Add( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV4Add( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathV4Add( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathM4Sub( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
+{
+    vmathV4Sub( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV4Sub( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV4Sub( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathV4Sub( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathM4Neg( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Neg( &result->col0, &mat->col0 );
+    vmathV4Neg( &result->col1, &mat->col1 );
+    vmathV4Neg( &result->col2, &mat->col2 );
+    vmathV4Neg( &result->col3, &mat->col3 );
+}
+
+static inline void vmathM4AbsPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4AbsPerElem( &result->col0, &mat->col0 );
+    vmathV4AbsPerElem( &result->col1, &mat->col1 );
+    vmathV4AbsPerElem( &result->col2, &mat->col2 );
+    vmathV4AbsPerElem( &result->col3, &mat->col3 );
+}
+
+static inline void vmathM4ScalarMul( VmathMatrix4 *result, const VmathMatrix4 *mat, float scalar )
+{
+    vmathV4ScalarMul( &result->col0, &mat->col0, scalar );
+    vmathV4ScalarMul( &result->col1, &mat->col1, scalar );
+    vmathV4ScalarMul( &result->col2, &mat->col2, scalar );
+    vmathV4ScalarMul( &result->col3, &mat->col3, scalar );
+}
+
+static inline void vmathM4MulV4( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector4 *vec )
+{
+    vec_float4 tmp0, tmp1, res;
+    vec_float4 xxxx, yyyy, zzzz, wwww;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    xxxx = vec_splat( vec->vec128, 0 );
+    yyyy = vec_splat( vec->vec128, 1 );
+    zzzz = vec_splat( vec->vec128, 2 );
+    wwww = vec_splat( vec->vec128, 3 );
+    tmp0 = vec_madd( mat->col0.vec128, xxxx, zero );
+    tmp1 = vec_madd( mat->col1.vec128, yyyy, zero );
+    tmp0 = vec_madd( mat->col2.vec128, zzzz, tmp0 );
+    tmp1 = vec_madd( mat->col3.vec128, wwww, tmp1 );
+    res = vec_add( tmp0, tmp1 );
+    result->vec128 = res;
+}
+
+static inline void vmathM4MulV3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector3 *vec )
+{
+    vec_float4 res;
+    vec_float4 xxxx, yyyy, zzzz;
+    xxxx = vec_splat( vec->vec128, 0 );
+    yyyy = vec_splat( vec->vec128, 1 );
+    zzzz = vec_splat( vec->vec128, 2 );
+    res = vec_madd( mat->col0.vec128, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    res = vec_madd( mat->col1.vec128, yyyy, res );
+    res = vec_madd( mat->col2.vec128, zzzz, res );
+    result->vec128 = res;
+}
+
+static inline void vmathM4MulP3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathPoint3 *pnt )
+{
+    vec_float4 tmp0, tmp1, res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    xxxx = vec_splat( pnt->vec128, 0 );
+    yyyy = vec_splat( pnt->vec128, 1 );
+    zzzz = vec_splat( pnt->vec128, 2 );
+    tmp0 = vec_madd( mat->col0.vec128, xxxx, zero );
+    tmp1 = vec_madd( mat->col1.vec128, yyyy, zero );
+    tmp0 = vec_madd( mat->col2.vec128, zzzz, tmp0 );
+    tmp1 = vec_add( mat->col3.vec128, tmp1 );
+    res = vec_add( tmp0, tmp1 );
+    result->vec128 = res;
+}
+
+static inline void vmathM4Mul( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
+{
+    VmathMatrix4 tmpResult;
+    vmathM4MulV4( &tmpResult.col0, mat0, &mat1->col0 );
+    vmathM4MulV4( &tmpResult.col1, mat0, &mat1->col1 );
+    vmathM4MulV4( &tmpResult.col2, mat0, &mat1->col2 );
+    vmathM4MulV4( &tmpResult.col3, mat0, &mat1->col3 );
+    vmathM4Copy( result, &tmpResult );
+}
+
+static inline void vmathM4MulT3( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathTransform3 *tfrm1 )
+{
+    VmathMatrix4 tmpResult;
+    VmathPoint3 tmpP3_0;
+    vmathM4MulV3( &tmpResult.col0, mat, &tfrm1->col0 );
+    vmathM4MulV3( &tmpResult.col1, mat, &tfrm1->col1 );
+    vmathM4MulV3( &tmpResult.col2, mat, &tfrm1->col2 );
+    vmathP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
+    vmathM4MulP3( &tmpResult.col3, mat, &tmpP3_0 );
+    vmathM4Copy( result, &tmpResult );
+}
+
+static inline void vmathM4MulPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
+{
+    vmathV4MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV4MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV4MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathV4MulPerElem( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathM4MakeIdentity( VmathMatrix4 *result )
+{
+    vmathV4MakeXAxis( &result->col0 );
+    vmathV4MakeYAxis( &result->col1 );
+    vmathV4MakeZAxis( &result->col2 );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4SetUpper3x3( VmathMatrix4 *result, const VmathMatrix3 *mat3 )
+{
+    vmathV4SetXYZ( &result->col0, &mat3->col0 );
+    vmathV4SetXYZ( &result->col1, &mat3->col1 );
+    vmathV4SetXYZ( &result->col2, &mat3->col2 );
+}
+
+static inline void vmathM4GetUpper3x3( VmathMatrix3 *result, const VmathMatrix4 *mat )
+{
+    vmathV4GetXYZ( &result->col0, &mat->col0 );
+    vmathV4GetXYZ( &result->col1, &mat->col1 );
+    vmathV4GetXYZ( &result->col2, &mat->col2 );
+}
+
+static inline void vmathM4SetTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec )
+{
+    vmathV4SetXYZ( &result->col3, translateVec );
+}
+
+static inline void vmathM4GetTranslation( VmathVector3 *result, const VmathMatrix4 *mat )
+{
+    vmathV4GetXYZ( result, &mat->col3 );
+}
+
+static inline void vmathM4MakeRotationX( VmathMatrix4 *result, float radians )
+{
+    vec_float4 s, c, res1, res2;
+    vec_uint4 select_y, select_z;
+    vec_float4 zero;
+    select_y = _VECTORMATH_MASK_0x0F00;
+    select_z = _VECTORMATH_MASK_0x00F0;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
+    res1 = vec_sel( zero, c, select_y );
+    res1 = vec_sel( res1, s, select_z );
+    res2 = vec_sel( zero, negatef4(s), select_y );
+    res2 = vec_sel( res2, c, select_z );
+    vmathV4MakeXAxis( &result->col0 );
+    result->col1.vec128 = res1;
+    result->col2.vec128 = res2;
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationY( VmathMatrix4 *result, float radians )
+{
+    vec_float4 s, c, res0, res2;
+    vec_uint4 select_x, select_z;
+    vec_float4 zero;
+    select_x = _VECTORMATH_MASK_0xF000;
+    select_z = _VECTORMATH_MASK_0x00F0;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, negatef4(s), select_z );
+    res2 = vec_sel( zero, s, select_x );
+    res2 = vec_sel( res2, c, select_z );
+    result->col0.vec128 = res0;
+    vmathV4MakeYAxis( &result->col1 );
+    result->col2.vec128 = res2;
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationZ( VmathMatrix4 *result, float radians )
+{
+    vec_float4 s, c, res0, res1;
+    vec_uint4 select_x, select_y;
+    vec_float4 zero;
+    select_x = _VECTORMATH_MASK_0xF000;
+    select_y = _VECTORMATH_MASK_0x0F00;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, s, select_y );
+    res1 = vec_sel( zero, negatef4(s), select_x );
+    res1 = vec_sel( res1, c, select_y );
+    result->col0.vec128 = res0;
+    result->col1.vec128 = res1;
+    vmathV4MakeZAxis( &result->col2 );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationZYX( VmathMatrix4 *result, const VmathVector3 *radiansXYZ )
+{
+    VmathVector4 tmpV4_0;
+    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    vmathV4MakeFromV3Scalar( &tmpV4_0, radiansXYZ, 0.0f );
+    angles = tmpV4_0.vec128;
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = vec_mergel( c, s );
+    Z1 = vec_mergel( negS, c );
+    Z1 = vec_andc( Z1, (vec_float4)_VECTORMATH_MASK_0x000F );
+    Y0 = vec_perm( negS, c, _VECTORMATH_PERM_BBYX );
+    Y1 = vec_perm( c, s, _VECTORMATH_PERM_BBYX );
+    X0 = vec_splat( s, 0 );
+    X1 = vec_splat( c, 0 );
+    tmp = vec_madd( Z0, Y1, zero );
+    result->col0.vec128 = vec_madd( Z0, Y0, zero );
+    result->col1.vec128 = vec_madd( Z1, X1, vec_madd( tmp, X0, zero ) );
+    result->col2.vec128 = vec_nmsub( Z1, X0, vec_madd( tmp, X1, zero ) );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationAxis( VmathMatrix4 *result, float radians, const VmathVector3 *unitVec )
+{
+    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2, zeroW;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    axis = unitVec->vec128;
+    sincosf4( (vec_float4){radians,radians,radians,radians}, &s, &c );
+    xxxx = vec_splat( axis, 0 );
+    yyyy = vec_splat( axis, 1 );
+    zzzz = vec_splat( axis, 2 );
+    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
+    axisS = vec_madd( axis, s, zero );
+    negAxisS = negatef4( axisS );
+    tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
+    tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
+    tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
+    tmp0 = vec_sel( tmp0, c, _VECTORMATH_MASK_0xF000 );
+    tmp1 = vec_sel( tmp1, c, _VECTORMATH_MASK_0x0F00 );
+    tmp2 = vec_sel( tmp2, c, _VECTORMATH_MASK_0x00F0 );
+    zeroW = (vec_float4)_VECTORMATH_MASK_0x000F;
+    axis = vec_andc( axis, zeroW );
+    tmp0 = vec_andc( tmp0, zeroW );
+    tmp1 = vec_andc( tmp1, zeroW );
+    tmp2 = vec_andc( tmp2, zeroW );
+    result->col0.vec128 = vec_madd( vec_madd( axis, xxxx, zero ), oneMinusC, tmp0 );
+    result->col1.vec128 = vec_madd( vec_madd( axis, yyyy, zero ), oneMinusC, tmp1 );
+    result->col2.vec128 = vec_madd( vec_madd( axis, zzzz, zero ), oneMinusC, tmp2 );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationQ( VmathMatrix4 *result, const VmathQuat *unitQuat )
+{
+    VmathTransform3 tmpT3_0;
+    vmathT3MakeRotationQ( &tmpT3_0, unitQuat );
+    vmathM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline void vmathM4MakeScale( VmathMatrix4 *result, const VmathVector3 *scaleVec )
+{
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    result->col0.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0xF000 );
+    result->col1.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0x0F00 );
+    result->col2.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0x00F0 );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4AppendScale( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathVector3 *scaleVec )
+{
+    vmathV4ScalarMul( &result->col0, &mat->col0, vmathV3GetX( scaleVec ) );
+    vmathV4ScalarMul( &result->col1, &mat->col1, vmathV3GetY( scaleVec ) );
+    vmathV4ScalarMul( &result->col2, &mat->col2, vmathV3GetZ( scaleVec ) );
+    vmathV4Copy( &result->col3, &mat->col3 );
+}
+
+static inline void vmathM4PrependScale( VmathMatrix4 *result, const VmathVector3 *scaleVec, const VmathMatrix4 *mat )
+{
+    VmathVector4 scale4;
+    vmathV4MakeFromV3Scalar( &scale4, scaleVec, 1.0f );
+    vmathV4MulPerElem( &result->col0, &mat->col0, &scale4 );
+    vmathV4MulPerElem( &result->col1, &mat->col1, &scale4 );
+    vmathV4MulPerElem( &result->col2, &mat->col2, &scale4 );
+    vmathV4MulPerElem( &result->col3, &mat->col3, &scale4 );
+}
+
+static inline void vmathM4MakeTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec )
+{
+    vmathV4MakeXAxis( &result->col0 );
+    vmathV4MakeYAxis( &result->col1 );
+    vmathV4MakeZAxis( &result->col2 );
+    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
+}
+
+static inline void vmathM4MakeLookAt( VmathMatrix4 *result, const VmathPoint3 *eyePos, const VmathPoint3 *lookAtPos, const VmathVector3 *upVec )
+{
+    VmathMatrix4 m4EyeFrame;
+    VmathVector3 v3X, v3Y, v3Z, tmpV3_0, tmpV3_1;
+    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
+    vmathV3Normalize( &v3Y, upVec );
+    vmathP3Sub( &tmpV3_0, eyePos, lookAtPos );
+    vmathV3Normalize( &v3Z, &tmpV3_0 );
+    vmathV3Cross( &tmpV3_1, &v3Y, &v3Z );
+    vmathV3Normalize( &v3X, &tmpV3_1 );
+    vmathV3Cross( &v3Y, &v3Z, &v3X );
+    vmathV4MakeFromV3( &tmpV4_0, &v3X );
+    vmathV4MakeFromV3( &tmpV4_1, &v3Y );
+    vmathV4MakeFromV3( &tmpV4_2, &v3Z );
+    vmathV4MakeFromP3( &tmpV4_3, eyePos );
+    vmathM4MakeFromCols( &m4EyeFrame, &tmpV4_0, &tmpV4_1, &tmpV4_2, &tmpV4_3 );
+    vmathM4OrthoInverse( result, &m4EyeFrame );
+}
+
+static inline void vmathM4MakePerspective( VmathMatrix4 *result, float fovyRadians, float aspect, float zNear, float zFar )
+{
+    float f, rangeInv;
+    vec_float4 zero, col0, col1, col2, col3;
+    union { vec_float4 v; float s[4]; } tmp;
+    f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f );
+    rangeInv = 1.0f / ( zNear - zFar );
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    tmp.v = zero;
+    tmp.s[0] = f / aspect;
+    col0 = tmp.v;
+    tmp.v = zero;
+    tmp.s[1] = f;
+    col1 = tmp.v;
+    tmp.v = zero;
+    tmp.s[2] = ( zNear + zFar ) * rangeInv;
+    tmp.s[3] = -1.0f;
+    col2 = tmp.v;
+    tmp.v = zero;
+    tmp.s[2] = zNear * zFar * rangeInv * 2.0f;
+    col3 = tmp.v;
+    result->col0.vec128 = col0;
+    result->col1.vec128 = col1;
+    result->col2.vec128 = col2;
+    result->col3.vec128 = col3;
+}
+
+static inline void vmathM4MakeFrustum( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vec_float4 lbf, rtn;
+    vec_float4 diff, sum, inv_diff;
+    vec_float4 diagonal, column, near2;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    union { vec_float4 v; float s[4]; } l, f, r, n, b, t;
+    l.s[0] = left;
+    f.s[0] = zFar;
+    r.s[0] = right;
+    n.s[0] = zNear;
+    b.s[0] = bottom;
+    t.s[0] = top;
+    lbf = vec_mergeh( l.v, f.v );
+    rtn = vec_mergeh( r.v, n.v );
+    lbf = vec_mergeh( lbf, b.v );
+    rtn = vec_mergeh( rtn, t.v );
+    diff = vec_sub( rtn, lbf );
+    sum  = vec_add( rtn, lbf );
+    inv_diff = recipf4( diff );
+    near2 = vec_splat( n.v, 0 );
+    near2 = vec_add( near2, near2 );
+    diagonal = vec_madd( near2, inv_diff, zero );
+    column = vec_madd( sum, inv_diff, zero );
+    result->col0.vec128 = vec_sel( zero, diagonal, _VECTORMATH_MASK_0xF000 );
+    result->col1.vec128 = vec_sel( zero, diagonal, _VECTORMATH_MASK_0x0F00 );
+    result->col2.vec128 = vec_sel( column, ((vec_float4){-1.0f,-1.0f,-1.0f,-1.0f}), _VECTORMATH_MASK_0x000F );
+    result->col3.vec128 = vec_sel( zero, vec_madd( diagonal, vec_splat( f.v, 0 ), zero ), _VECTORMATH_MASK_0x00F0 );
+}
+
+static inline void vmathM4MakeOrthographic( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vec_float4 lbf, rtn;
+    vec_float4 diff, sum, inv_diff, neg_inv_diff;
+    vec_float4 diagonal, column;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    union { vec_float4 v; float s[4]; } l, f, r, n, b, t;
+    l.s[0] = left;
+    f.s[0] = zFar;
+    r.s[0] = right;
+    n.s[0] = zNear;
+    b.s[0] = bottom;
+    t.s[0] = top;
+    lbf = vec_mergeh( l.v, f.v );
+    rtn = vec_mergeh( r.v, n.v );
+    lbf = vec_mergeh( lbf, b.v );
+    rtn = vec_mergeh( rtn, t.v );
+    diff = vec_sub( rtn, lbf );
+    sum  = vec_add( rtn, lbf );
+    inv_diff = recipf4( diff );
+    neg_inv_diff = negatef4( inv_diff );
+    diagonal = vec_add( inv_diff, inv_diff );
+    column = vec_madd( sum, vec_sel( neg_inv_diff, inv_diff, _VECTORMATH_MASK_0x00F0 ), zero );
+    result->col0.vec128 = vec_sel( zero, diagonal, _VECTORMATH_MASK_0xF000 );
+    result->col1.vec128 = vec_sel( zero, diagonal, _VECTORMATH_MASK_0x0F00 );
+    result->col2.vec128 = vec_sel( zero, diagonal, _VECTORMATH_MASK_0x00F0 );
+    result->col3.vec128 = vec_sel( column, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), _VECTORMATH_MASK_0x000F );
+}
+
+static inline void vmathM4Select( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, unsigned int select1 )
+{
+    vmathV4Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
+    vmathV4Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
+    vmathV4Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
+    vmathV4Select( &result->col3, &mat0->col3, &mat1->col3, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathM4Print( const VmathMatrix4 *mat )
+{
+    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
+    vmathM4GetRow( &tmpV4_0, mat, 0 );
+    vmathV4Print( &tmpV4_0 );
+    vmathM4GetRow( &tmpV4_1, mat, 1 );
+    vmathV4Print( &tmpV4_1 );
+    vmathM4GetRow( &tmpV4_2, mat, 2 );
+    vmathV4Print( &tmpV4_2 );
+    vmathM4GetRow( &tmpV4_3, mat, 3 );
+    vmathV4Print( &tmpV4_3 );
+}
+
+static inline void vmathM4Prints( const VmathMatrix4 *mat, const char *name )
+{
+    printf("%s:\n", name);
+    vmathM4Print( mat );
+}
+
+#endif
+
+static inline void vmathT3Copy( VmathTransform3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( &result->col0, &tfrm->col0 );
+    vmathV3Copy( &result->col1, &tfrm->col1 );
+    vmathV3Copy( &result->col2, &tfrm->col2 );
+    vmathV3Copy( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathT3MakeFromScalar( VmathTransform3 *result, float scalar )
+{
+    vmathV3MakeFromScalar( &result->col0, scalar );
+    vmathV3MakeFromScalar( &result->col1, scalar );
+    vmathV3MakeFromScalar( &result->col2, scalar );
+    vmathV3MakeFromScalar( &result->col3, scalar );
+}
+
+static inline void vmathT3MakeFromCols( VmathTransform3 *result, const VmathVector3 *_col0, const VmathVector3 *_col1, const VmathVector3 *_col2, const VmathVector3 *_col3 )
+{
+    vmathV3Copy( &result->col0, _col0 );
+    vmathV3Copy( &result->col1, _col1 );
+    vmathV3Copy( &result->col2, _col2 );
+    vmathV3Copy( &result->col3, _col3 );
+}
+
+static inline void vmathT3MakeFromM3V3( VmathTransform3 *result, const VmathMatrix3 *tfrm, const VmathVector3 *translateVec )
+{
+    vmathT3SetUpper3x3( result, tfrm );
+    vmathT3SetTranslation( result, translateVec );
+}
+
+static inline void vmathT3MakeFromQV3( VmathTransform3 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec )
+{
+    VmathMatrix3 tmpM3_0;
+    vmathM3MakeFromQ( &tmpM3_0, unitQuat );
+    vmathT3SetUpper3x3( result, &tmpM3_0 );
+    vmathT3SetTranslation( result, translateVec );
+}
+
+static inline void vmathT3SetCol0( VmathTransform3 *result, const VmathVector3 *_col0 )
+{
+    vmathV3Copy( &result->col0, _col0 );
+}
+
+static inline void vmathT3SetCol1( VmathTransform3 *result, const VmathVector3 *_col1 )
+{
+    vmathV3Copy( &result->col1, _col1 );
+}
+
+static inline void vmathT3SetCol2( VmathTransform3 *result, const VmathVector3 *_col2 )
+{
+    vmathV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathT3SetCol3( VmathTransform3 *result, const VmathVector3 *_col3 )
+{
+    vmathV3Copy( &result->col3, _col3 );
+}
+
+static inline void vmathT3SetCol( VmathTransform3 *result, int col, const VmathVector3 *vec )
+{
+    vmathV3Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathT3SetRow( VmathTransform3 *result, int row, const VmathVector4 *vec )
+{
+    vmathV3SetElem( &result->col0, row, vmathV4GetElem( vec, 0 ) );
+    vmathV3SetElem( &result->col1, row, vmathV4GetElem( vec, 1 ) );
+    vmathV3SetElem( &result->col2, row, vmathV4GetElem( vec, 2 ) );
+    vmathV3SetElem( &result->col3, row, vmathV4GetElem( vec, 3 ) );
+}
+
+static inline void vmathT3SetElem( VmathTransform3 *result, int col, int row, float val )
+{
+    VmathVector3 tmpV3_0;
+    vmathT3GetCol( &tmpV3_0, result, col );
+    vmathV3SetElem( &tmpV3_0, row, val );
+    vmathT3SetCol( result, col, &tmpV3_0 );
+}
+
+static inline float vmathT3GetElem( const VmathTransform3 *tfrm, int col, int row )
+{
+    VmathVector3 tmpV3_0;
+    vmathT3GetCol( &tmpV3_0, tfrm, col );
+    return vmathV3GetElem( &tmpV3_0, row );
+}
+
+static inline void vmathT3GetCol0( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col0 );
+}
+
+static inline void vmathT3GetCol1( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col1 );
+}
+
+static inline void vmathT3GetCol2( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col2 );
+}
+
+static inline void vmathT3GetCol3( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col3 );
+}
+
+static inline void vmathT3GetCol( VmathVector3 *result, const VmathTransform3 *tfrm, int col )
+{
+    vmathV3Copy( result, (&tfrm->col0 + col) );
+}
+
+static inline void vmathT3GetRow( VmathVector4 *result, const VmathTransform3 *tfrm, int row )
+{
+    vmathV4MakeFromElems( result, vmathV3GetElem( &tfrm->col0, row ), vmathV3GetElem( &tfrm->col1, row ), vmathV3GetElem( &tfrm->col2, row ), vmathV3GetElem( &tfrm->col3, row ) );
+}
+
+static inline void vmathT3Inverse( VmathTransform3 *result, const VmathTransform3 *tfrm )
+{
+    vec_float4 inv0, inv1, inv2, inv3;
+    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    tmp2 = _vmathVfCross( tfrm->col0.vec128, tfrm->col1.vec128 );
+    tmp0 = _vmathVfCross( tfrm->col1.vec128, tfrm->col2.vec128 );
+    tmp1 = _vmathVfCross( tfrm->col2.vec128, tfrm->col0.vec128 );
+    inv3 = negatef4( tfrm->col3.vec128 );
+    dot = _vmathVfDot3( tmp2, tfrm->col2.vec128 );
+    dot = vec_splat( dot, 0 );
+    invdet = recipf4( dot );
+    tmp3 = vec_mergeh( tmp0, tmp2 );
+    tmp4 = vec_mergel( tmp0, tmp2 );
+    inv0 = vec_mergeh( tmp3, tmp1 );
+    xxxx = vec_splat( inv3, 0 );
+    inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
+    inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
+    yyyy = vec_splat( inv3, 1 );
+    zzzz = vec_splat( inv3, 2 );
+    inv3 = vec_madd( inv0, xxxx, zero );
+    inv3 = vec_madd( inv1, yyyy, inv3 );
+    inv3 = vec_madd( inv2, zzzz, inv3 );
+    inv0 = vec_madd( inv0, invdet, zero );
+    inv1 = vec_madd( inv1, invdet, zero );
+    inv2 = vec_madd( inv2, invdet, zero );
+    inv3 = vec_madd( inv3, invdet, zero );
+    result->col0.vec128 = inv0;
+    result->col1.vec128 = inv1;
+    result->col2.vec128 = inv2;
+    result->col3.vec128 = inv3;
+}
+
+static inline void vmathT3OrthoInverse( VmathTransform3 *result, const VmathTransform3 *tfrm )
+{
+    vec_float4 inv0, inv1, inv2, inv3;
+    vec_float4 tmp0, tmp1;
+    vec_float4 xxxx, yyyy, zzzz;
+    tmp0 = vec_mergeh( tfrm->col0.vec128, tfrm->col2.vec128 );
+    tmp1 = vec_mergel( tfrm->col0.vec128, tfrm->col2.vec128 );
+    inv3 = negatef4( tfrm->col3.vec128 );
+    inv0 = vec_mergeh( tmp0, tfrm->col1.vec128 );
+    xxxx = vec_splat( inv3, 0 );
+    inv1 = vec_perm( tmp0, tfrm->col1.vec128, _VECTORMATH_PERM_ZBWX );
+    inv2 = vec_perm( tmp1, tfrm->col1.vec128, _VECTORMATH_PERM_XCYX );
+    yyyy = vec_splat( inv3, 1 );
+    zzzz = vec_splat( inv3, 2 );
+    inv3 = vec_madd( inv0, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    inv3 = vec_madd( inv1, yyyy, inv3 );
+    inv3 = vec_madd( inv2, zzzz, inv3 );
+    result->col0.vec128 = inv0;
+    result->col1.vec128 = inv1;
+    result->col2.vec128 = inv2;
+    result->col3.vec128 = inv3;
+}
+
+static inline void vmathT3AbsPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3AbsPerElem( &result->col0, &tfrm->col0 );
+    vmathV3AbsPerElem( &result->col1, &tfrm->col1 );
+    vmathV3AbsPerElem( &result->col2, &tfrm->col2 );
+    vmathV3AbsPerElem( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathT3MulV3( VmathVector3 *result, const VmathTransform3 *tfrm, const VmathVector3 *vec )
+{
+    vec_float4 res;
+    vec_float4 xxxx, yyyy, zzzz;
+    xxxx = vec_splat( vec->vec128, 0 );
+    yyyy = vec_splat( vec->vec128, 1 );
+    zzzz = vec_splat( vec->vec128, 2 );
+    res = vec_madd( tfrm->col0.vec128, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    res = vec_madd( tfrm->col1.vec128, yyyy, res );
+    res = vec_madd( tfrm->col2.vec128, zzzz, res );
+    result->vec128 = res;
+}
+
+static inline void vmathT3MulP3( VmathPoint3 *result, const VmathTransform3 *tfrm, const VmathPoint3 *pnt )
+{
+    vec_float4 tmp0, tmp1, res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    xxxx = vec_splat( pnt->vec128, 0 );
+    yyyy = vec_splat( pnt->vec128, 1 );
+    zzzz = vec_splat( pnt->vec128, 2 );
+    tmp0 = vec_madd( tfrm->col0.vec128, xxxx, zero );
+    tmp1 = vec_madd( tfrm->col1.vec128, yyyy, zero );
+    tmp0 = vec_madd( tfrm->col2.vec128, zzzz, tmp0 );
+    tmp1 = vec_add( tfrm->col3.vec128, tmp1 );
+    res = vec_add( tmp0, tmp1 );
+    result->vec128 = res;
+}
+
+static inline void vmathT3Mul( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 )
+{
+    VmathTransform3 tmpResult;
+    VmathPoint3 tmpP3_0, tmpP3_1;
+    vmathT3MulV3( &tmpResult.col0, tfrm0, &tfrm1->col0 );
+    vmathT3MulV3( &tmpResult.col1, tfrm0, &tfrm1->col1 );
+    vmathT3MulV3( &tmpResult.col2, tfrm0, &tfrm1->col2 );
+    vmathP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
+    vmathT3MulP3( &tmpP3_1, tfrm0, &tmpP3_0 );
+    vmathV3MakeFromP3( &tmpResult.col3, &tmpP3_1 );
+    vmathT3Copy( result, &tmpResult );
+}
+
+static inline void vmathT3MulPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 )
+{
+    vmathV3MulPerElem( &result->col0, &tfrm0->col0, &tfrm1->col0 );
+    vmathV3MulPerElem( &result->col1, &tfrm0->col1, &tfrm1->col1 );
+    vmathV3MulPerElem( &result->col2, &tfrm0->col2, &tfrm1->col2 );
+    vmathV3MulPerElem( &result->col3, &tfrm0->col3, &tfrm1->col3 );
+}
+
+static inline void vmathT3MakeIdentity( VmathTransform3 *result )
+{
+    vmathV3MakeXAxis( &result->col0 );
+    vmathV3MakeYAxis( &result->col1 );
+    vmathV3MakeZAxis( &result->col2 );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3SetUpper3x3( VmathTransform3 *result, const VmathMatrix3 *tfrm )
+{
+    vmathV3Copy( &result->col0, &tfrm->col0 );
+    vmathV3Copy( &result->col1, &tfrm->col1 );
+    vmathV3Copy( &result->col2, &tfrm->col2 );
+}
+
+static inline void vmathT3GetUpper3x3( VmathMatrix3 *result, const VmathTransform3 *tfrm )
+{
+    vmathM3MakeFromCols( result, &tfrm->col0, &tfrm->col1, &tfrm->col2 );
+}
+
+static inline void vmathT3SetTranslation( VmathTransform3 *result, const VmathVector3 *translateVec )
+{
+    vmathV3Copy( &result->col3, translateVec );
+}
+
+static inline void vmathT3GetTranslation( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col3 );
+}
+
+static inline void vmathT3MakeRotationX( VmathTransform3 *result, float radians )
+{
+    vec_float4 s, c, res1, res2;
+    vec_uint4 select_y, select_z;
+    vec_float4 zero;
+    select_y = _VECTORMATH_MASK_0x0F00;
+    select_z = _VECTORMATH_MASK_0x00F0;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
+    res1 = vec_sel( zero, c, select_y );
+    res1 = vec_sel( res1, s, select_z );
+    res2 = vec_sel( zero, negatef4(s), select_y );
+    res2 = vec_sel( res2, c, select_z );
+    vmathV3MakeXAxis( &result->col0 );
+    result->col1.vec128 = res1;
+    result->col2.vec128 = res2;
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3MakeRotationY( VmathTransform3 *result, float radians )
+{
+    vec_float4 s, c, res0, res2;
+    vec_uint4 select_x, select_z;
+    vec_float4 zero;
+    select_x = _VECTORMATH_MASK_0xF000;
+    select_z = _VECTORMATH_MASK_0x00F0;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, negatef4(s), select_z );
+    res2 = vec_sel( zero, s, select_x );
+    res2 = vec_sel( res2, c, select_z );
+    result->col0.vec128 = res0;
+    vmathV3MakeYAxis( &result->col1 );
+    result->col2.vec128 = res2;
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3MakeRotationZ( VmathTransform3 *result, float radians )
+{
+    vec_float4 s, c, res0, res1;
+    vec_uint4 select_x, select_y;
+    vec_float4 zero;
+    select_x = _VECTORMATH_MASK_0xF000;
+    select_y = _VECTORMATH_MASK_0x0F00;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, s, select_y );
+    res1 = vec_sel( zero, negatef4(s), select_x );
+    res1 = vec_sel( res1, c, select_y );
+    result->col0.vec128 = res0;
+    result->col1.vec128 = res1;
+    vmathV3MakeZAxis( &result->col2 );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3MakeRotationZYX( VmathTransform3 *result, const VmathVector3 *radiansXYZ )
+{
+    VmathVector4 tmpV4_0;
+    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    vmathV4MakeFromV3Scalar( &tmpV4_0, radiansXYZ, 0.0f );
+    angles = tmpV4_0.vec128;
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = vec_mergel( c, s );
+    Z1 = vec_mergel( negS, c );
+    Z1 = vec_andc( Z1, (vec_float4)_VECTORMATH_MASK_0x000F );
+    Y0 = vec_perm( negS, c, _VECTORMATH_PERM_BBYX );
+    Y1 = vec_perm( c, s, _VECTORMATH_PERM_BBYX );
+    X0 = vec_splat( s, 0 );
+    X1 = vec_splat( c, 0 );
+    tmp = vec_madd( Z0, Y1, zero );
+    result->col0.vec128 = vec_madd( Z0, Y0, zero );
+    result->col1.vec128 = vec_madd( Z1, X1, vec_madd( tmp, X0, zero ) );
+    result->col2.vec128 = vec_nmsub( Z1, X0, vec_madd( tmp, X1, zero ) );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3MakeRotationAxis( VmathTransform3 *result, float radians, const VmathVector3 *unitVec )
+{
+    VmathMatrix3 tmpM3_0;
+    VmathVector3 tmpV3_0;
+    vmathM3MakeRotationAxis( &tmpM3_0, radians, unitVec );
+    vmathV3MakeFromScalar( &tmpV3_0, 0.0f );
+    vmathT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
+}
+
+static inline void vmathT3MakeRotationQ( VmathTransform3 *result, const VmathQuat *unitQuat )
+{
+    VmathMatrix3 tmpM3_0;
+    VmathVector3 tmpV3_0;
+    vmathM3MakeFromQ( &tmpM3_0, unitQuat );
+    vmathV3MakeFromScalar( &tmpV3_0, 0.0f );
+    vmathT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
+}
+
+static inline void vmathT3MakeScale( VmathTransform3 *result, const VmathVector3 *scaleVec )
+{
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    result->col0.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0xF000 );
+    result->col1.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0x0F00 );
+    result->col2.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0x00F0 );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3AppendScale( VmathTransform3 *result, const VmathTransform3 *tfrm, const VmathVector3 *scaleVec )
+{
+    vmathV3ScalarMul( &result->col0, &tfrm->col0, vmathV3GetX( scaleVec ) );
+    vmathV3ScalarMul( &result->col1, &tfrm->col1, vmathV3GetY( scaleVec ) );
+    vmathV3ScalarMul( &result->col2, &tfrm->col2, vmathV3GetZ( scaleVec ) );
+    vmathV3Copy( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathT3PrependScale( VmathTransform3 *result, const VmathVector3 *scaleVec, const VmathTransform3 *tfrm )
+{
+    vmathV3MulPerElem( &result->col0, &tfrm->col0, scaleVec );
+    vmathV3MulPerElem( &result->col1, &tfrm->col1, scaleVec );
+    vmathV3MulPerElem( &result->col2, &tfrm->col2, scaleVec );
+    vmathV3MulPerElem( &result->col3, &tfrm->col3, scaleVec );
+}
+
+static inline void vmathT3MakeTranslation( VmathTransform3 *result, const VmathVector3 *translateVec )
+{
+    vmathV3MakeXAxis( &result->col0 );
+    vmathV3MakeYAxis( &result->col1 );
+    vmathV3MakeZAxis( &result->col2 );
+    vmathV3Copy( &result->col3, translateVec );
+}
+
+static inline void vmathT3Select( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, unsigned int select1 )
+{
+    vmathV3Select( &result->col0, &tfrm0->col0, &tfrm1->col0, select1 );
+    vmathV3Select( &result->col1, &tfrm0->col1, &tfrm1->col1, select1 );
+    vmathV3Select( &result->col2, &tfrm0->col2, &tfrm1->col2, select1 );
+    vmathV3Select( &result->col3, &tfrm0->col3, &tfrm1->col3, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathT3Print( const VmathTransform3 *tfrm )
+{
+    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2;
+    vmathT3GetRow( &tmpV4_0, tfrm, 0 );
+    vmathV4Print( &tmpV4_0 );
+    vmathT3GetRow( &tmpV4_1, tfrm, 1 );
+    vmathV4Print( &tmpV4_1 );
+    vmathT3GetRow( &tmpV4_2, tfrm, 2 );
+    vmathV4Print( &tmpV4_2 );
+}
+
+static inline void vmathT3Prints( const VmathTransform3 *tfrm, const char *name )
+{
+    printf("%s:\n", name);
+    vmathT3Print( tfrm );
+}
+
+#endif
+
+static inline void vmathQMakeFromM3( VmathQuat *result, const VmathMatrix3 *tfrm )
+{
+    vec_float4 res;
+    vec_float4 col0, col1, col2;
+    vec_float4 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff;
+    vec_float4 zy_xz_yx, yz_zx_xy, sum, diff;
+    vec_float4 radicand, invSqrt, scale;
+    vec_float4 res0, res1, res2, res3;
+    vec_float4 xx, yy, zz;
+    vec_uint4 select_x = _VECTORMATH_MASK_0xF000;
+    vec_uint4 select_y = _VECTORMATH_MASK_0x0F00;
+    vec_uint4 select_z = _VECTORMATH_MASK_0x00F0;
+    vec_uint4 select_w = _VECTORMATH_MASK_0x000F;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+
+    col0 = tfrm->col0.vec128;
+    col1 = tfrm->col1.vec128;
+    col2 = tfrm->col2.vec128;
+
+    /* four cases: */
+    /* trace > 0 */
+    /* else */
+    /*    xx largest diagonal element */
+    /*    yy largest diagonal element */
+    /*    zz largest diagonal element */
+
+    /* compute quaternion for each case */
+
+    xx_yy = vec_sel( col0, col1, select_y );
+    xx_yy_zz_xx = vec_perm( xx_yy, col2, _VECTORMATH_PERM_XYCX );
+    yy_zz_xx_yy = vec_perm( xx_yy, col2, _VECTORMATH_PERM_YCXY );
+    zz_xx_yy_zz = vec_perm( xx_yy, col2, _VECTORMATH_PERM_CXYC );
+
+    diagSum = vec_add( vec_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
+    diagDiff = vec_sub( vec_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
+    radicand = vec_add( vec_sel( diagDiff, diagSum, select_w ), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+    invSqrt = rsqrtf4( radicand );
+
+    zy_xz_yx = vec_sel( col0, col1, select_z );
+    zy_xz_yx = vec_perm( zy_xz_yx, col2, _VECTORMATH_PERM_ZAYX );
+    yz_zx_xy = vec_sel( col0, col1, select_x );
+    yz_zx_xy = vec_perm( yz_zx_xy, col2, _VECTORMATH_PERM_BZXX );
+
+    sum = vec_add( zy_xz_yx, yz_zx_xy );
+    diff = vec_sub( zy_xz_yx, yz_zx_xy );
+
+    scale = vec_madd( invSqrt, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), zero );
+    res0 = vec_perm( sum, diff, _VECTORMATH_PERM_XZYA );
+    res1 = vec_perm( sum, diff, _VECTORMATH_PERM_ZXXB );
+    res2 = vec_perm( sum, diff, _VECTORMATH_PERM_YXXC );
+    res3 = diff;
+    res0 = vec_sel( res0, radicand, select_x );
+    res1 = vec_sel( res1, radicand, select_y );
+    res2 = vec_sel( res2, radicand, select_z );
+    res3 = vec_sel( res3, radicand, select_w );
+    res0 = vec_madd( res0, vec_splat( scale, 0 ), zero );
+    res1 = vec_madd( res1, vec_splat( scale, 1 ), zero );
+    res2 = vec_madd( res2, vec_splat( scale, 2 ), zero );
+    res3 = vec_madd( res3, vec_splat( scale, 3 ), zero );
+
+    /* determine case and select answer */
+
+    xx = vec_splat( col0, 0 );
+    yy = vec_splat( col1, 1 );
+    zz = vec_splat( col2, 2 );
+    res = vec_sel( res0, res1, vec_cmpgt( yy, xx ) );
+    res = vec_sel( res, res2, vec_and( vec_cmpgt( zz, xx ), vec_cmpgt( zz, yy ) ) );
+    res = vec_sel( res, res3, vec_cmpgt( vec_splat( diagSum, 0 ), zero ) );
+    result->vec128 = res;
+}
+
+static inline void vmathV3Outer( VmathMatrix3 *result, const VmathVector3 *tfrm0, const VmathVector3 *tfrm1 )
+{
+    vmathV3ScalarMul( &result->col0, tfrm0, vmathV3GetX( tfrm1 ) );
+    vmathV3ScalarMul( &result->col1, tfrm0, vmathV3GetY( tfrm1 ) );
+    vmathV3ScalarMul( &result->col2, tfrm0, vmathV3GetZ( tfrm1 ) );
+}
+
+static inline void vmathV4Outer( VmathMatrix4 *result, const VmathVector4 *tfrm0, const VmathVector4 *tfrm1 )
+{
+    vmathV4ScalarMul( &result->col0, tfrm0, vmathV4GetX( tfrm1 ) );
+    vmathV4ScalarMul( &result->col1, tfrm0, vmathV4GetY( tfrm1 ) );
+    vmathV4ScalarMul( &result->col2, tfrm0, vmathV4GetZ( tfrm1 ) );
+    vmathV4ScalarMul( &result->col3, tfrm0, vmathV4GetW( tfrm1 ) );
+}
+
+static inline void vmathV3RowMul( VmathVector3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat )
+{
+    vec_float4 tmp0, tmp1, mcol0, mcol1, mcol2, res;
+    vec_float4 xxxx, yyyy, zzzz;
+    tmp0 = vec_mergeh( mat->col0.vec128, mat->col2.vec128 );
+    tmp1 = vec_mergel( mat->col0.vec128, mat->col2.vec128 );
+    xxxx = vec_splat( vec->vec128, 0 );
+    mcol0 = vec_mergeh( tmp0, mat->col1.vec128 );
+    mcol1 = vec_perm( tmp0, mat->col1.vec128, _VECTORMATH_PERM_ZBWX );
+    mcol2 = vec_perm( tmp1, mat->col1.vec128, _VECTORMATH_PERM_XCYX );
+    yyyy = vec_splat( vec->vec128, 1 );
+    res = vec_madd( mcol0, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    zzzz = vec_splat( vec->vec128, 2 );
+    res = vec_madd( mcol1, yyyy, res );
+    res = vec_madd( mcol2, zzzz, res );
+    result->vec128 = res;
+}
+
+static inline void vmathV3CrossMatrix( VmathMatrix3 *result, const VmathVector3 *vec )
+{
+    vec_float4 neg, res0, res1, res2;
+    neg = negatef4( vec->vec128 );
+    res0 = vec_perm( vec->vec128, neg, _VECTORMATH_PERM_XZBX );
+    res1 = vec_perm( vec->vec128, neg, _VECTORMATH_PERM_CXXX );
+    res2 = vec_perm( vec->vec128, neg, _VECTORMATH_PERM_YAXX );
+    res0 = vec_andc( res0, (vec_float4)_VECTORMATH_MASK_0xF000 );
+    res1 = vec_andc( res1, (vec_float4)_VECTORMATH_MASK_0x0F00 );
+    res2 = vec_andc( res2, (vec_float4)_VECTORMATH_MASK_0x00F0 );
+    result->col0.vec128 = res0;
+    result->col1.vec128 = res1;
+    result->col2.vec128 = res2;
+}
+
+static inline void vmathV3CrossMatrixMul( VmathMatrix3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat )
+{
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2;
+    vmathV3Cross( &tmpV3_0, vec, &mat->col0 );
+    vmathV3Cross( &tmpV3_1, vec, &mat->col1 );
+    vmathV3Cross( &tmpV3_2, vec, &mat->col2 );
+    vmathM3MakeFromCols( result, &tmpV3_0, &tmpV3_1, &tmpV3_2 );
+}
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_aos_v.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_aos_v.h
index f8f869bb8..47c683abe 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_aos_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_aos_v.h
@@ -1,1026 +1,1026 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_AOS_V_C_H
-#define _VECTORMATH_MAT_AOS_V_C_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
- */
-#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B })
-#define _VECTORMATH_PERM_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_XZBX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X })     
-#define _VECTORMATH_PERM_CXXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_YAXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C })
-#define _VECTORMATH_PERM_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W, _VECTORMATH_PERM_Z })
-#define _VECTORMATH_PERM_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y })
-#define _VECTORMATH_PERM_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C })
-#define _VECTORMATH_PERM_ZAYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_BZXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
-#define _VECTORMATH_PERM_ZXXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_B })
-#define _VECTORMATH_PERM_YXXC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_C })
-#define _VECTORMATH_PERM_BBYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-static inline VmathMatrix3 vmathM3MakeFromScalar_V( float scalar )
-{
-    VmathMatrix3 result;
-    vmathM3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeFromQ_V( VmathQuat unitQuat )
-{
-    VmathMatrix3 result;
-    vmathM3MakeFromQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeFromCols_V( VmathVector3 _col0, VmathVector3 _col1, VmathVector3 _col2 )
-{
-    VmathMatrix3 result;
-    vmathM3MakeFromCols(&result, &_col0, &_col1, &_col2);
-    return result;
-}
-
-static inline void vmathM3SetCol0_V( VmathMatrix3 *result, VmathVector3 _col0 )
-{
-    vmathM3SetCol0(result, &_col0);
-}
-
-static inline void vmathM3SetCol1_V( VmathMatrix3 *result, VmathVector3 _col1 )
-{
-    vmathM3SetCol1(result, &_col1);
-}
-
-static inline void vmathM3SetCol2_V( VmathMatrix3 *result, VmathVector3 _col2 )
-{
-    vmathM3SetCol2(result, &_col2);
-}
-
-static inline void vmathM3SetCol_V( VmathMatrix3 *result, int col, VmathVector3 vec )
-{
-    vmathM3SetCol(result, col, &vec);
-}
-
-static inline void vmathM3SetRow_V( VmathMatrix3 *result, int row, VmathVector3 vec )
-{
-    vmathM3SetRow(result, row, &vec);
-}
-
-static inline void vmathM3SetElem_V( VmathMatrix3 *result, int col, int row, float val )
-{
-    vmathM3SetElem(result, col, row, val);
-}
-
-static inline float vmathM3GetElem_V( VmathMatrix3 mat, int col, int row )
-{
-    return vmathM3GetElem(&mat, col, row);
-}
-
-static inline VmathVector3 vmathM3GetCol0_V( VmathMatrix3 mat )
-{
-    VmathVector3 result;
-    vmathM3GetCol0(&result, &mat);
-    return result;
-}
-
-static inline VmathVector3 vmathM3GetCol1_V( VmathMatrix3 mat )
-{
-    VmathVector3 result;
-    vmathM3GetCol1(&result, &mat);
-    return result;
-}
-
-static inline VmathVector3 vmathM3GetCol2_V( VmathMatrix3 mat )
-{
-    VmathVector3 result;
-    vmathM3GetCol2(&result, &mat);
-    return result;
-}
-
-static inline VmathVector3 vmathM3GetCol_V( VmathMatrix3 mat, int col )
-{
-    VmathVector3 result;
-    vmathM3GetCol(&result, &mat, col);
-    return result;
-}
-
-static inline VmathVector3 vmathM3GetRow_V( VmathMatrix3 mat, int row )
-{
-    VmathVector3 result;
-    vmathM3GetRow(&result, &mat, row);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Transpose_V( VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3Transpose(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Inverse_V( VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3Inverse(&result, &mat);
-    return result;
-}
-
-static inline float vmathM3Determinant_V( VmathMatrix3 mat )
-{
-    return vmathM3Determinant(&mat);
-}
-
-static inline VmathMatrix3 vmathM3Add_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
-{
-    VmathMatrix3 result;
-    vmathM3Add(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Sub_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
-{
-    VmathMatrix3 result;
-    vmathM3Sub(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Neg_V( VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3Neg(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3AbsPerElem_V( VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3AbsPerElem(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3ScalarMul_V( VmathMatrix3 mat, float scalar )
-{
-    VmathMatrix3 result;
-    vmathM3ScalarMul(&result, &mat, scalar);
-    return result;
-}
-
-static inline VmathVector3 vmathM3MulV3_V( VmathMatrix3 mat, VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathM3MulV3(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Mul_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
-{
-    VmathMatrix3 result;
-    vmathM3Mul(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MulPerElem_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
-{
-    VmathMatrix3 result;
-    vmathM3MulPerElem(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeIdentity_V( )
-{
-    VmathMatrix3 result;
-    vmathM3MakeIdentity(&result);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationX_V( float radians )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationY_V( float radians )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationZ_V( float radians )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationZYX_V( VmathVector3 radiansXYZ )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationAxis_V( float radians, VmathVector3 unitVec )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationQ_V( VmathQuat unitQuat )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeScale_V( VmathVector3 scaleVec )
-{
-    VmathMatrix3 result;
-    vmathM3MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3AppendScale_V( VmathMatrix3 mat, VmathVector3 scaleVec )
-{
-    VmathMatrix3 result;
-    vmathM3AppendScale(&result, &mat, &scaleVec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3PrependScale_V( VmathVector3 scaleVec, VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3PrependScale(&result, &scaleVec, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Select_V( VmathMatrix3 mat0, VmathMatrix3 mat1, unsigned int select1 )
-{
-    VmathMatrix3 result;
-    vmathM3Select(&result, &mat0, &mat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathM3Print_V( VmathMatrix3 mat )
-{
-    vmathM3Print(&mat);
-}
-
-static inline void vmathM3Prints_V( VmathMatrix3 mat, const char *name )
-{
-    vmathM3Prints(&mat, name);
-}
-
-#endif
-
-static inline VmathMatrix4 vmathM4MakeFromScalar_V( float scalar )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFromT3_V( VmathTransform3 mat )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromT3(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFromCols_V( VmathVector4 _col0, VmathVector4 _col1, VmathVector4 _col2, VmathVector4 _col3 )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFromM3V3_V( VmathMatrix3 mat, VmathVector3 translateVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromM3V3(&result, &mat, &translateVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromQV3(&result, &unitQuat, &translateVec);
-    return result;
-}
-
-static inline void vmathM4SetCol0_V( VmathMatrix4 *result, VmathVector4 _col0 )
-{
-    vmathM4SetCol0(result, &_col0);
-}
-
-static inline void vmathM4SetCol1_V( VmathMatrix4 *result, VmathVector4 _col1 )
-{
-    vmathM4SetCol1(result, &_col1);
-}
-
-static inline void vmathM4SetCol2_V( VmathMatrix4 *result, VmathVector4 _col2 )
-{
-    vmathM4SetCol2(result, &_col2);
-}
-
-static inline void vmathM4SetCol3_V( VmathMatrix4 *result, VmathVector4 _col3 )
-{
-    vmathM4SetCol3(result, &_col3);
-}
-
-static inline void vmathM4SetCol_V( VmathMatrix4 *result, int col, VmathVector4 vec )
-{
-    vmathM4SetCol(result, col, &vec);
-}
-
-static inline void vmathM4SetRow_V( VmathMatrix4 *result, int row, VmathVector4 vec )
-{
-    vmathM4SetRow(result, row, &vec);
-}
-
-static inline void vmathM4SetElem_V( VmathMatrix4 *result, int col, int row, float val )
-{
-    vmathM4SetElem(result, col, row, val);
-}
-
-static inline float vmathM4GetElem_V( VmathMatrix4 mat, int col, int row )
-{
-    return vmathM4GetElem(&mat, col, row);
-}
-
-static inline VmathVector4 vmathM4GetCol0_V( VmathMatrix4 mat )
-{
-    VmathVector4 result;
-    vmathM4GetCol0(&result, &mat);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetCol1_V( VmathMatrix4 mat )
-{
-    VmathVector4 result;
-    vmathM4GetCol1(&result, &mat);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetCol2_V( VmathMatrix4 mat )
-{
-    VmathVector4 result;
-    vmathM4GetCol2(&result, &mat);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetCol3_V( VmathMatrix4 mat )
-{
-    VmathVector4 result;
-    vmathM4GetCol3(&result, &mat);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetCol_V( VmathMatrix4 mat, int col )
-{
-    VmathVector4 result;
-    vmathM4GetCol(&result, &mat, col);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetRow_V( VmathMatrix4 mat, int row )
-{
-    VmathVector4 result;
-    vmathM4GetRow(&result, &mat, row);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Transpose_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4Transpose(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Inverse_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4Inverse(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4AffineInverse_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4AffineInverse(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4OrthoInverse_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4OrthoInverse(&result, &mat);
-    return result;
-}
-
-static inline float vmathM4Determinant_V( VmathMatrix4 mat )
-{
-    return vmathM4Determinant(&mat);
-}
-
-static inline VmathMatrix4 vmathM4Add_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
-{
-    VmathMatrix4 result;
-    vmathM4Add(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Sub_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
-{
-    VmathMatrix4 result;
-    vmathM4Sub(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Neg_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4Neg(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4AbsPerElem_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4AbsPerElem(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4ScalarMul_V( VmathMatrix4 mat, float scalar )
-{
-    VmathMatrix4 result;
-    vmathM4ScalarMul(&result, &mat, scalar);
-    return result;
-}
-
-static inline VmathVector4 vmathM4MulV4_V( VmathMatrix4 mat, VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathM4MulV4(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathM4MulV3_V( VmathMatrix4 mat, VmathVector3 vec )
-{
-    VmathVector4 result;
-    vmathM4MulV3(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathM4MulP3_V( VmathMatrix4 mat, VmathPoint3 pnt )
-{
-    VmathVector4 result;
-    vmathM4MulP3(&result, &mat, &pnt);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Mul_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
-{
-    VmathMatrix4 result;
-    vmathM4Mul(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MulT3_V( VmathMatrix4 mat, VmathTransform3 tfrm1 )
-{
-    VmathMatrix4 result;
-    vmathM4MulT3(&result, &mat, &tfrm1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MulPerElem_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
-{
-    VmathMatrix4 result;
-    vmathM4MulPerElem(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeIdentity_V( )
-{
-    VmathMatrix4 result;
-    vmathM4MakeIdentity(&result);
-    return result;
-}
-
-static inline void vmathM4SetUpper3x3_V( VmathMatrix4 *result, VmathMatrix3 mat3 )
-{
-    vmathM4SetUpper3x3(result, &mat3);
-}
-
-static inline VmathMatrix3 vmathM4GetUpper3x3_V( VmathMatrix4 mat )
-{
-    VmathMatrix3 result;
-    vmathM4GetUpper3x3(&result, &mat);
-    return result;
-}
-
-static inline void vmathM4SetTranslation_V( VmathMatrix4 *result, VmathVector3 translateVec )
-{
-    vmathM4SetTranslation(result, &translateVec);
-}
-
-static inline VmathVector3 vmathM4GetTranslation_V( VmathMatrix4 mat )
-{
-    VmathVector3 result;
-    vmathM4GetTranslation(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationX_V( float radians )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationY_V( float radians )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationZ_V( float radians )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationZYX_V( VmathVector3 radiansXYZ )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationAxis_V( float radians, VmathVector3 unitVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationQ_V( VmathQuat unitQuat )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeScale_V( VmathVector3 scaleVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4AppendScale_V( VmathMatrix4 mat, VmathVector3 scaleVec )
-{
-    VmathMatrix4 result;
-    vmathM4AppendScale(&result, &mat, &scaleVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4PrependScale_V( VmathVector3 scaleVec, VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4PrependScale(&result, &scaleVec, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeTranslation_V( VmathVector3 translateVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeTranslation(&result, &translateVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeLookAt_V( VmathPoint3 eyePos, VmathPoint3 lookAtPos, VmathVector3 upVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeLookAt(&result, &eyePos, &lookAtPos, &upVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakePerspective_V( float fovyRadians, float aspect, float zNear, float zFar )
-{
-    VmathMatrix4 result;
-    vmathM4MakePerspective(&result, fovyRadians, aspect, zNear, zFar);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFrustum_V( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFrustum(&result, left, right, bottom, top, zNear, zFar);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeOrthographic_V( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    VmathMatrix4 result;
-    vmathM4MakeOrthographic(&result, left, right, bottom, top, zNear, zFar);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Select_V( VmathMatrix4 mat0, VmathMatrix4 mat1, unsigned int select1 )
-{
-    VmathMatrix4 result;
-    vmathM4Select(&result, &mat0, &mat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathM4Print_V( VmathMatrix4 mat )
-{
-    vmathM4Print(&mat);
-}
-
-static inline void vmathM4Prints_V( VmathMatrix4 mat, const char *name )
-{
-    vmathM4Prints(&mat, name);
-}
-
-#endif
-
-static inline VmathTransform3 vmathT3MakeFromScalar_V( float scalar )
-{
-    VmathTransform3 result;
-    vmathT3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeFromCols_V( VmathVector3 _col0, VmathVector3 _col1, VmathVector3 _col2, VmathVector3 _col3 )
-{
-    VmathTransform3 result;
-    vmathT3MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeFromM3V3_V( VmathMatrix3 tfrm, VmathVector3 translateVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeFromM3V3(&result, &tfrm, &translateVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeFromQV3(&result, &unitQuat, &translateVec);
-    return result;
-}
-
-static inline void vmathT3SetCol0_V( VmathTransform3 *result, VmathVector3 _col0 )
-{
-    vmathT3SetCol0(result, &_col0);
-}
-
-static inline void vmathT3SetCol1_V( VmathTransform3 *result, VmathVector3 _col1 )
-{
-    vmathT3SetCol1(result, &_col1);
-}
-
-static inline void vmathT3SetCol2_V( VmathTransform3 *result, VmathVector3 _col2 )
-{
-    vmathT3SetCol2(result, &_col2);
-}
-
-static inline void vmathT3SetCol3_V( VmathTransform3 *result, VmathVector3 _col3 )
-{
-    vmathT3SetCol3(result, &_col3);
-}
-
-static inline void vmathT3SetCol_V( VmathTransform3 *result, int col, VmathVector3 vec )
-{
-    vmathT3SetCol(result, col, &vec);
-}
-
-static inline void vmathT3SetRow_V( VmathTransform3 *result, int row, VmathVector4 vec )
-{
-    vmathT3SetRow(result, row, &vec);
-}
-
-static inline void vmathT3SetElem_V( VmathTransform3 *result, int col, int row, float val )
-{
-    vmathT3SetElem(result, col, row, val);
-}
-
-static inline float vmathT3GetElem_V( VmathTransform3 tfrm, int col, int row )
-{
-    return vmathT3GetElem(&tfrm, col, row);
-}
-
-static inline VmathVector3 vmathT3GetCol0_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetCol0(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3GetCol1_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetCol1(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3GetCol2_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetCol2(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3GetCol3_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetCol3(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3GetCol_V( VmathTransform3 tfrm, int col )
-{
-    VmathVector3 result;
-    vmathT3GetCol(&result, &tfrm, col);
-    return result;
-}
-
-static inline VmathVector4 vmathT3GetRow_V( VmathTransform3 tfrm, int row )
-{
-    VmathVector4 result;
-    vmathT3GetRow(&result, &tfrm, row);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3Inverse_V( VmathTransform3 tfrm )
-{
-    VmathTransform3 result;
-    vmathT3Inverse(&result, &tfrm);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3OrthoInverse_V( VmathTransform3 tfrm )
-{
-    VmathTransform3 result;
-    vmathT3OrthoInverse(&result, &tfrm);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3AbsPerElem_V( VmathTransform3 tfrm )
-{
-    VmathTransform3 result;
-    vmathT3AbsPerElem(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3MulV3_V( VmathTransform3 tfrm, VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathT3MulV3(&result, &tfrm, &vec);
-    return result;
-}
-
-static inline VmathPoint3 vmathT3MulP3_V( VmathTransform3 tfrm, VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathT3MulP3(&result, &tfrm, &pnt);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3Mul_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 )
-{
-    VmathTransform3 result;
-    vmathT3Mul(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MulPerElem_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 )
-{
-    VmathTransform3 result;
-    vmathT3MulPerElem(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeIdentity_V( )
-{
-    VmathTransform3 result;
-    vmathT3MakeIdentity(&result);
-    return result;
-}
-
-static inline void vmathT3SetUpper3x3_V( VmathTransform3 *result, VmathMatrix3 tfrm )
-{
-    vmathT3SetUpper3x3(result, &tfrm);
-}
-
-static inline VmathMatrix3 vmathT3GetUpper3x3_V( VmathTransform3 tfrm )
-{
-    VmathMatrix3 result;
-    vmathT3GetUpper3x3(&result, &tfrm);
-    return result;
-}
-
-static inline void vmathT3SetTranslation_V( VmathTransform3 *result, VmathVector3 translateVec )
-{
-    vmathT3SetTranslation(result, &translateVec);
-}
-
-static inline VmathVector3 vmathT3GetTranslation_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetTranslation(&result, &tfrm);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationX_V( float radians )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationY_V( float radians )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationZ_V( float radians )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationZYX_V( VmathVector3 radiansXYZ )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationAxis_V( float radians, VmathVector3 unitVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationQ_V( VmathQuat unitQuat )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeScale_V( VmathVector3 scaleVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3AppendScale_V( VmathTransform3 tfrm, VmathVector3 scaleVec )
-{
-    VmathTransform3 result;
-    vmathT3AppendScale(&result, &tfrm, &scaleVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3PrependScale_V( VmathVector3 scaleVec, VmathTransform3 tfrm )
-{
-    VmathTransform3 result;
-    vmathT3PrependScale(&result, &scaleVec, &tfrm);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeTranslation_V( VmathVector3 translateVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeTranslation(&result, &translateVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3Select_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, unsigned int select1 )
-{
-    VmathTransform3 result;
-    vmathT3Select(&result, &tfrm0, &tfrm1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathT3Print_V( VmathTransform3 tfrm )
-{
-    vmathT3Print(&tfrm);
-}
-
-static inline void vmathT3Prints_V( VmathTransform3 tfrm, const char *name )
-{
-    vmathT3Prints(&tfrm, name);
-}
-
-#endif
-
-static inline VmathQuat vmathQMakeFromM3_V( VmathMatrix3 tfrm )
-{
-    VmathQuat result;
-    vmathQMakeFromM3(&result, &tfrm);
-    return result;
-}
-
-static inline VmathMatrix3 vmathV3Outer_V( VmathVector3 tfrm0, VmathVector3 tfrm1 )
-{
-    VmathMatrix3 result;
-    vmathV3Outer(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathV4Outer_V( VmathVector4 tfrm0, VmathVector4 tfrm1 )
-{
-    VmathMatrix4 result;
-    vmathV4Outer(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3RowMul_V( VmathVector3 vec, VmathMatrix3 mat )
-{
-    VmathVector3 result;
-    vmathV3RowMul(&result, &vec, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathV3CrossMatrix_V( VmathVector3 vec )
-{
-    VmathMatrix3 result;
-    vmathV3CrossMatrix(&result, &vec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathV3CrossMatrixMul_V( VmathVector3 vec, VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathV3CrossMatrixMul(&result, &vec, &mat);
-    return result;
-}
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_AOS_V_C_H
+#define _VECTORMATH_MAT_AOS_V_C_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+ */
+#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B })
+#define _VECTORMATH_PERM_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_XZBX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X })     
+#define _VECTORMATH_PERM_CXXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_YAXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W, _VECTORMATH_PERM_Z })
+#define _VECTORMATH_PERM_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y })
+#define _VECTORMATH_PERM_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_ZAYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_BZXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
+#define _VECTORMATH_PERM_ZXXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_B })
+#define _VECTORMATH_PERM_YXXC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_BBYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+static inline VmathMatrix3 vmathM3MakeFromScalar_V( float scalar )
+{
+    VmathMatrix3 result;
+    vmathM3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeFromQ_V( VmathQuat unitQuat )
+{
+    VmathMatrix3 result;
+    vmathM3MakeFromQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeFromCols_V( VmathVector3 _col0, VmathVector3 _col1, VmathVector3 _col2 )
+{
+    VmathMatrix3 result;
+    vmathM3MakeFromCols(&result, &_col0, &_col1, &_col2);
+    return result;
+}
+
+static inline void vmathM3SetCol0_V( VmathMatrix3 *result, VmathVector3 _col0 )
+{
+    vmathM3SetCol0(result, &_col0);
+}
+
+static inline void vmathM3SetCol1_V( VmathMatrix3 *result, VmathVector3 _col1 )
+{
+    vmathM3SetCol1(result, &_col1);
+}
+
+static inline void vmathM3SetCol2_V( VmathMatrix3 *result, VmathVector3 _col2 )
+{
+    vmathM3SetCol2(result, &_col2);
+}
+
+static inline void vmathM3SetCol_V( VmathMatrix3 *result, int col, VmathVector3 vec )
+{
+    vmathM3SetCol(result, col, &vec);
+}
+
+static inline void vmathM3SetRow_V( VmathMatrix3 *result, int row, VmathVector3 vec )
+{
+    vmathM3SetRow(result, row, &vec);
+}
+
+static inline void vmathM3SetElem_V( VmathMatrix3 *result, int col, int row, float val )
+{
+    vmathM3SetElem(result, col, row, val);
+}
+
+static inline float vmathM3GetElem_V( VmathMatrix3 mat, int col, int row )
+{
+    return vmathM3GetElem(&mat, col, row);
+}
+
+static inline VmathVector3 vmathM3GetCol0_V( VmathMatrix3 mat )
+{
+    VmathVector3 result;
+    vmathM3GetCol0(&result, &mat);
+    return result;
+}
+
+static inline VmathVector3 vmathM3GetCol1_V( VmathMatrix3 mat )
+{
+    VmathVector3 result;
+    vmathM3GetCol1(&result, &mat);
+    return result;
+}
+
+static inline VmathVector3 vmathM3GetCol2_V( VmathMatrix3 mat )
+{
+    VmathVector3 result;
+    vmathM3GetCol2(&result, &mat);
+    return result;
+}
+
+static inline VmathVector3 vmathM3GetCol_V( VmathMatrix3 mat, int col )
+{
+    VmathVector3 result;
+    vmathM3GetCol(&result, &mat, col);
+    return result;
+}
+
+static inline VmathVector3 vmathM3GetRow_V( VmathMatrix3 mat, int row )
+{
+    VmathVector3 result;
+    vmathM3GetRow(&result, &mat, row);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Transpose_V( VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3Transpose(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Inverse_V( VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3Inverse(&result, &mat);
+    return result;
+}
+
+static inline float vmathM3Determinant_V( VmathMatrix3 mat )
+{
+    return vmathM3Determinant(&mat);
+}
+
+static inline VmathMatrix3 vmathM3Add_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
+{
+    VmathMatrix3 result;
+    vmathM3Add(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Sub_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
+{
+    VmathMatrix3 result;
+    vmathM3Sub(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Neg_V( VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3Neg(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3AbsPerElem_V( VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3AbsPerElem(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3ScalarMul_V( VmathMatrix3 mat, float scalar )
+{
+    VmathMatrix3 result;
+    vmathM3ScalarMul(&result, &mat, scalar);
+    return result;
+}
+
+static inline VmathVector3 vmathM3MulV3_V( VmathMatrix3 mat, VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathM3MulV3(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Mul_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
+{
+    VmathMatrix3 result;
+    vmathM3Mul(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MulPerElem_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
+{
+    VmathMatrix3 result;
+    vmathM3MulPerElem(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeIdentity_V( )
+{
+    VmathMatrix3 result;
+    vmathM3MakeIdentity(&result);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationX_V( float radians )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationY_V( float radians )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationZ_V( float radians )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationZYX_V( VmathVector3 radiansXYZ )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationAxis_V( float radians, VmathVector3 unitVec )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationQ_V( VmathQuat unitQuat )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeScale_V( VmathVector3 scaleVec )
+{
+    VmathMatrix3 result;
+    vmathM3MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3AppendScale_V( VmathMatrix3 mat, VmathVector3 scaleVec )
+{
+    VmathMatrix3 result;
+    vmathM3AppendScale(&result, &mat, &scaleVec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3PrependScale_V( VmathVector3 scaleVec, VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3PrependScale(&result, &scaleVec, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Select_V( VmathMatrix3 mat0, VmathMatrix3 mat1, unsigned int select1 )
+{
+    VmathMatrix3 result;
+    vmathM3Select(&result, &mat0, &mat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathM3Print_V( VmathMatrix3 mat )
+{
+    vmathM3Print(&mat);
+}
+
+static inline void vmathM3Prints_V( VmathMatrix3 mat, const char *name )
+{
+    vmathM3Prints(&mat, name);
+}
+
+#endif
+
+static inline VmathMatrix4 vmathM4MakeFromScalar_V( float scalar )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFromT3_V( VmathTransform3 mat )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromT3(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFromCols_V( VmathVector4 _col0, VmathVector4 _col1, VmathVector4 _col2, VmathVector4 _col3 )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFromM3V3_V( VmathMatrix3 mat, VmathVector3 translateVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromM3V3(&result, &mat, &translateVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromQV3(&result, &unitQuat, &translateVec);
+    return result;
+}
+
+static inline void vmathM4SetCol0_V( VmathMatrix4 *result, VmathVector4 _col0 )
+{
+    vmathM4SetCol0(result, &_col0);
+}
+
+static inline void vmathM4SetCol1_V( VmathMatrix4 *result, VmathVector4 _col1 )
+{
+    vmathM4SetCol1(result, &_col1);
+}
+
+static inline void vmathM4SetCol2_V( VmathMatrix4 *result, VmathVector4 _col2 )
+{
+    vmathM4SetCol2(result, &_col2);
+}
+
+static inline void vmathM4SetCol3_V( VmathMatrix4 *result, VmathVector4 _col3 )
+{
+    vmathM4SetCol3(result, &_col3);
+}
+
+static inline void vmathM4SetCol_V( VmathMatrix4 *result, int col, VmathVector4 vec )
+{
+    vmathM4SetCol(result, col, &vec);
+}
+
+static inline void vmathM4SetRow_V( VmathMatrix4 *result, int row, VmathVector4 vec )
+{
+    vmathM4SetRow(result, row, &vec);
+}
+
+static inline void vmathM4SetElem_V( VmathMatrix4 *result, int col, int row, float val )
+{
+    vmathM4SetElem(result, col, row, val);
+}
+
+static inline float vmathM4GetElem_V( VmathMatrix4 mat, int col, int row )
+{
+    return vmathM4GetElem(&mat, col, row);
+}
+
+static inline VmathVector4 vmathM4GetCol0_V( VmathMatrix4 mat )
+{
+    VmathVector4 result;
+    vmathM4GetCol0(&result, &mat);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetCol1_V( VmathMatrix4 mat )
+{
+    VmathVector4 result;
+    vmathM4GetCol1(&result, &mat);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetCol2_V( VmathMatrix4 mat )
+{
+    VmathVector4 result;
+    vmathM4GetCol2(&result, &mat);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetCol3_V( VmathMatrix4 mat )
+{
+    VmathVector4 result;
+    vmathM4GetCol3(&result, &mat);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetCol_V( VmathMatrix4 mat, int col )
+{
+    VmathVector4 result;
+    vmathM4GetCol(&result, &mat, col);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetRow_V( VmathMatrix4 mat, int row )
+{
+    VmathVector4 result;
+    vmathM4GetRow(&result, &mat, row);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Transpose_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4Transpose(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Inverse_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4Inverse(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4AffineInverse_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4AffineInverse(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4OrthoInverse_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4OrthoInverse(&result, &mat);
+    return result;
+}
+
+static inline float vmathM4Determinant_V( VmathMatrix4 mat )
+{
+    return vmathM4Determinant(&mat);
+}
+
+static inline VmathMatrix4 vmathM4Add_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
+{
+    VmathMatrix4 result;
+    vmathM4Add(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Sub_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
+{
+    VmathMatrix4 result;
+    vmathM4Sub(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Neg_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4Neg(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4AbsPerElem_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4AbsPerElem(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4ScalarMul_V( VmathMatrix4 mat, float scalar )
+{
+    VmathMatrix4 result;
+    vmathM4ScalarMul(&result, &mat, scalar);
+    return result;
+}
+
+static inline VmathVector4 vmathM4MulV4_V( VmathMatrix4 mat, VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathM4MulV4(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathM4MulV3_V( VmathMatrix4 mat, VmathVector3 vec )
+{
+    VmathVector4 result;
+    vmathM4MulV3(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathM4MulP3_V( VmathMatrix4 mat, VmathPoint3 pnt )
+{
+    VmathVector4 result;
+    vmathM4MulP3(&result, &mat, &pnt);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Mul_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
+{
+    VmathMatrix4 result;
+    vmathM4Mul(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MulT3_V( VmathMatrix4 mat, VmathTransform3 tfrm1 )
+{
+    VmathMatrix4 result;
+    vmathM4MulT3(&result, &mat, &tfrm1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MulPerElem_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
+{
+    VmathMatrix4 result;
+    vmathM4MulPerElem(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeIdentity_V( )
+{
+    VmathMatrix4 result;
+    vmathM4MakeIdentity(&result);
+    return result;
+}
+
+static inline void vmathM4SetUpper3x3_V( VmathMatrix4 *result, VmathMatrix3 mat3 )
+{
+    vmathM4SetUpper3x3(result, &mat3);
+}
+
+static inline VmathMatrix3 vmathM4GetUpper3x3_V( VmathMatrix4 mat )
+{
+    VmathMatrix3 result;
+    vmathM4GetUpper3x3(&result, &mat);
+    return result;
+}
+
+static inline void vmathM4SetTranslation_V( VmathMatrix4 *result, VmathVector3 translateVec )
+{
+    vmathM4SetTranslation(result, &translateVec);
+}
+
+static inline VmathVector3 vmathM4GetTranslation_V( VmathMatrix4 mat )
+{
+    VmathVector3 result;
+    vmathM4GetTranslation(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationX_V( float radians )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationY_V( float radians )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationZ_V( float radians )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationZYX_V( VmathVector3 radiansXYZ )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationAxis_V( float radians, VmathVector3 unitVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationQ_V( VmathQuat unitQuat )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeScale_V( VmathVector3 scaleVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4AppendScale_V( VmathMatrix4 mat, VmathVector3 scaleVec )
+{
+    VmathMatrix4 result;
+    vmathM4AppendScale(&result, &mat, &scaleVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4PrependScale_V( VmathVector3 scaleVec, VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4PrependScale(&result, &scaleVec, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeTranslation_V( VmathVector3 translateVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeTranslation(&result, &translateVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeLookAt_V( VmathPoint3 eyePos, VmathPoint3 lookAtPos, VmathVector3 upVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeLookAt(&result, &eyePos, &lookAtPos, &upVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakePerspective_V( float fovyRadians, float aspect, float zNear, float zFar )
+{
+    VmathMatrix4 result;
+    vmathM4MakePerspective(&result, fovyRadians, aspect, zNear, zFar);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFrustum_V( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFrustum(&result, left, right, bottom, top, zNear, zFar);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeOrthographic_V( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    VmathMatrix4 result;
+    vmathM4MakeOrthographic(&result, left, right, bottom, top, zNear, zFar);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Select_V( VmathMatrix4 mat0, VmathMatrix4 mat1, unsigned int select1 )
+{
+    VmathMatrix4 result;
+    vmathM4Select(&result, &mat0, &mat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathM4Print_V( VmathMatrix4 mat )
+{
+    vmathM4Print(&mat);
+}
+
+static inline void vmathM4Prints_V( VmathMatrix4 mat, const char *name )
+{
+    vmathM4Prints(&mat, name);
+}
+
+#endif
+
+static inline VmathTransform3 vmathT3MakeFromScalar_V( float scalar )
+{
+    VmathTransform3 result;
+    vmathT3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeFromCols_V( VmathVector3 _col0, VmathVector3 _col1, VmathVector3 _col2, VmathVector3 _col3 )
+{
+    VmathTransform3 result;
+    vmathT3MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeFromM3V3_V( VmathMatrix3 tfrm, VmathVector3 translateVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeFromM3V3(&result, &tfrm, &translateVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeFromQV3(&result, &unitQuat, &translateVec);
+    return result;
+}
+
+static inline void vmathT3SetCol0_V( VmathTransform3 *result, VmathVector3 _col0 )
+{
+    vmathT3SetCol0(result, &_col0);
+}
+
+static inline void vmathT3SetCol1_V( VmathTransform3 *result, VmathVector3 _col1 )
+{
+    vmathT3SetCol1(result, &_col1);
+}
+
+static inline void vmathT3SetCol2_V( VmathTransform3 *result, VmathVector3 _col2 )
+{
+    vmathT3SetCol2(result, &_col2);
+}
+
+static inline void vmathT3SetCol3_V( VmathTransform3 *result, VmathVector3 _col3 )
+{
+    vmathT3SetCol3(result, &_col3);
+}
+
+static inline void vmathT3SetCol_V( VmathTransform3 *result, int col, VmathVector3 vec )
+{
+    vmathT3SetCol(result, col, &vec);
+}
+
+static inline void vmathT3SetRow_V( VmathTransform3 *result, int row, VmathVector4 vec )
+{
+    vmathT3SetRow(result, row, &vec);
+}
+
+static inline void vmathT3SetElem_V( VmathTransform3 *result, int col, int row, float val )
+{
+    vmathT3SetElem(result, col, row, val);
+}
+
+static inline float vmathT3GetElem_V( VmathTransform3 tfrm, int col, int row )
+{
+    return vmathT3GetElem(&tfrm, col, row);
+}
+
+static inline VmathVector3 vmathT3GetCol0_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetCol0(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3GetCol1_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetCol1(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3GetCol2_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetCol2(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3GetCol3_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetCol3(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3GetCol_V( VmathTransform3 tfrm, int col )
+{
+    VmathVector3 result;
+    vmathT3GetCol(&result, &tfrm, col);
+    return result;
+}
+
+static inline VmathVector4 vmathT3GetRow_V( VmathTransform3 tfrm, int row )
+{
+    VmathVector4 result;
+    vmathT3GetRow(&result, &tfrm, row);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3Inverse_V( VmathTransform3 tfrm )
+{
+    VmathTransform3 result;
+    vmathT3Inverse(&result, &tfrm);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3OrthoInverse_V( VmathTransform3 tfrm )
+{
+    VmathTransform3 result;
+    vmathT3OrthoInverse(&result, &tfrm);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3AbsPerElem_V( VmathTransform3 tfrm )
+{
+    VmathTransform3 result;
+    vmathT3AbsPerElem(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3MulV3_V( VmathTransform3 tfrm, VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathT3MulV3(&result, &tfrm, &vec);
+    return result;
+}
+
+static inline VmathPoint3 vmathT3MulP3_V( VmathTransform3 tfrm, VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathT3MulP3(&result, &tfrm, &pnt);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3Mul_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 )
+{
+    VmathTransform3 result;
+    vmathT3Mul(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MulPerElem_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 )
+{
+    VmathTransform3 result;
+    vmathT3MulPerElem(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeIdentity_V( )
+{
+    VmathTransform3 result;
+    vmathT3MakeIdentity(&result);
+    return result;
+}
+
+static inline void vmathT3SetUpper3x3_V( VmathTransform3 *result, VmathMatrix3 tfrm )
+{
+    vmathT3SetUpper3x3(result, &tfrm);
+}
+
+static inline VmathMatrix3 vmathT3GetUpper3x3_V( VmathTransform3 tfrm )
+{
+    VmathMatrix3 result;
+    vmathT3GetUpper3x3(&result, &tfrm);
+    return result;
+}
+
+static inline void vmathT3SetTranslation_V( VmathTransform3 *result, VmathVector3 translateVec )
+{
+    vmathT3SetTranslation(result, &translateVec);
+}
+
+static inline VmathVector3 vmathT3GetTranslation_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetTranslation(&result, &tfrm);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationX_V( float radians )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationY_V( float radians )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationZ_V( float radians )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationZYX_V( VmathVector3 radiansXYZ )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationAxis_V( float radians, VmathVector3 unitVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationQ_V( VmathQuat unitQuat )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeScale_V( VmathVector3 scaleVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3AppendScale_V( VmathTransform3 tfrm, VmathVector3 scaleVec )
+{
+    VmathTransform3 result;
+    vmathT3AppendScale(&result, &tfrm, &scaleVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3PrependScale_V( VmathVector3 scaleVec, VmathTransform3 tfrm )
+{
+    VmathTransform3 result;
+    vmathT3PrependScale(&result, &scaleVec, &tfrm);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeTranslation_V( VmathVector3 translateVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeTranslation(&result, &translateVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3Select_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, unsigned int select1 )
+{
+    VmathTransform3 result;
+    vmathT3Select(&result, &tfrm0, &tfrm1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathT3Print_V( VmathTransform3 tfrm )
+{
+    vmathT3Print(&tfrm);
+}
+
+static inline void vmathT3Prints_V( VmathTransform3 tfrm, const char *name )
+{
+    vmathT3Prints(&tfrm, name);
+}
+
+#endif
+
+static inline VmathQuat vmathQMakeFromM3_V( VmathMatrix3 tfrm )
+{
+    VmathQuat result;
+    vmathQMakeFromM3(&result, &tfrm);
+    return result;
+}
+
+static inline VmathMatrix3 vmathV3Outer_V( VmathVector3 tfrm0, VmathVector3 tfrm1 )
+{
+    VmathMatrix3 result;
+    vmathV3Outer(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathV4Outer_V( VmathVector4 tfrm0, VmathVector4 tfrm1 )
+{
+    VmathMatrix4 result;
+    vmathV4Outer(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3RowMul_V( VmathVector3 vec, VmathMatrix3 mat )
+{
+    VmathVector3 result;
+    vmathV3RowMul(&result, &vec, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathV3CrossMatrix_V( VmathVector3 vec )
+{
+    VmathMatrix3 result;
+    vmathV3CrossMatrix(&result, &vec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathV3CrossMatrixMul_V( VmathVector3 vec, VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathV3CrossMatrixMul(&result, &vec, &mat);
+    return result;
+}
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_soa.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_soa.h
index 25b7af0aa..3b673c4eb 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_soa.h
@@ -1,1493 +1,1493 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_SOA_C_H
-#define _VECTORMATH_MAT_SOA_C_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- */
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-static inline void vmathSoaM3Copy( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3Copy( &result->col0, &mat->col0 );
-    vmathSoaV3Copy( &result->col1, &mat->col1 );
-    vmathSoaV3Copy( &result->col2, &mat->col2 );
-}
-
-static inline void vmathSoaM3MakeFromScalar( VmathSoaMatrix3 *result, vec_float4 scalar )
-{
-    vmathSoaV3MakeFromScalar( &result->col0, scalar );
-    vmathSoaV3MakeFromScalar( &result->col1, scalar );
-    vmathSoaV3MakeFromScalar( &result->col2, scalar );
-}
-
-static inline void vmathSoaM3MakeFromQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat )
-{
-    vec_float4 qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
-    qx = unitQuat->x;
-    qy = unitQuat->y;
-    qz = unitQuat->z;
-    qw = unitQuat->w;
-    qx2 = vec_add( qx, qx );
-    qy2 = vec_add( qy, qy );
-    qz2 = vec_add( qz, qz );
-    qxqx2 = vec_madd( qx, qx2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qxqy2 = vec_madd( qx, qy2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qxqz2 = vec_madd( qx, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qxqw2 = vec_madd( qw, qx2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qyqy2 = vec_madd( qy, qy2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qyqz2 = vec_madd( qy, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qyqw2 = vec_madd( qw, qy2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qzqz2 = vec_madd( qz, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qzqw2 = vec_madd( qw, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV3MakeFromElems( &result->col0, vec_sub( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), qyqy2 ), qzqz2 ), vec_add( qxqy2, qzqw2 ), vec_sub( qxqz2, qyqw2 ) );
-    vmathSoaV3MakeFromElems( &result->col1, vec_sub( qxqy2, qzqw2 ), vec_sub( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), qxqx2 ), qzqz2 ), vec_add( qyqz2, qxqw2 ) );
-    vmathSoaV3MakeFromElems( &result->col2, vec_add( qxqz2, qyqw2 ), vec_sub( qyqz2, qxqw2 ), vec_sub( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), qxqx2 ), qyqy2 ) );
-}
-
-static inline void vmathSoaM3MakeFromCols( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col0, const VmathSoaVector3 *_col1, const VmathSoaVector3 *_col2 )
-{
-    vmathSoaV3Copy( &result->col0, _col0 );
-    vmathSoaV3Copy( &result->col1, _col1 );
-    vmathSoaV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathSoaM3MakeFromAos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vmathSoaV3MakeFromAos( &result->col0, &mat->col0 );
-    vmathSoaV3MakeFromAos( &result->col1, &mat->col1 );
-    vmathSoaV3MakeFromAos( &result->col2, &mat->col2 );
-}
-
-static inline void vmathSoaM3MakeFrom4Aos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, const VmathMatrix3 *mat2, const VmathMatrix3 *mat3 )
-{
-    vmathSoaV3MakeFrom4Aos( &result->col0, &mat0->col0, &mat1->col0, &mat2->col0, &mat3->col0 );
-    vmathSoaV3MakeFrom4Aos( &result->col1, &mat0->col1, &mat1->col1, &mat2->col1, &mat3->col1 );
-    vmathSoaV3MakeFrom4Aos( &result->col2, &mat0->col2, &mat1->col2, &mat2->col2, &mat3->col2 );
-}
-
-static inline void vmathSoaM3Get4Aos( const VmathSoaMatrix3 *mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 )
-{
-    vmathSoaV3Get4Aos( &mat->col0, &result0->col0, &result1->col0, &result2->col0, &result3->col0 );
-    vmathSoaV3Get4Aos( &mat->col1, &result0->col1, &result1->col1, &result2->col1, &result3->col1 );
-    vmathSoaV3Get4Aos( &mat->col2, &result0->col2, &result1->col2, &result2->col2, &result3->col2 );
-}
-
-static inline void vmathSoaM3SetCol0( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col0 )
-{
-    vmathSoaV3Copy( &result->col0, _col0 );
-}
-
-static inline void vmathSoaM3SetCol1( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col1 )
-{
-    vmathSoaV3Copy( &result->col1, _col1 );
-}
-
-static inline void vmathSoaM3SetCol2( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col2 )
-{
-    vmathSoaV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathSoaM3SetCol( VmathSoaMatrix3 *result, int col, const VmathSoaVector3 *vec )
-{
-    vmathSoaV3Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathSoaM3SetRow( VmathSoaMatrix3 *result, int row, const VmathSoaVector3 *vec )
-{
-    vmathSoaV3SetElem( &result->col0, row, vmathSoaV3GetElem( vec, 0 ) );
-    vmathSoaV3SetElem( &result->col1, row, vmathSoaV3GetElem( vec, 1 ) );
-    vmathSoaV3SetElem( &result->col2, row, vmathSoaV3GetElem( vec, 2 ) );
-}
-
-static inline void vmathSoaM3SetElem( VmathSoaMatrix3 *result, int col, int row, vec_float4 val )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaM3GetCol( &tmpV3_0, result, col );
-    vmathSoaV3SetElem( &tmpV3_0, row, val );
-    vmathSoaM3SetCol( result, col, &tmpV3_0 );
-}
-
-static inline vec_float4 vmathSoaM3GetElem( const VmathSoaMatrix3 *mat, int col, int row )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaM3GetCol( &tmpV3_0, mat, col );
-    return vmathSoaV3GetElem( &tmpV3_0, row );
-}
-
-static inline void vmathSoaM3GetCol0( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3Copy( result, &mat->col0 );
-}
-
-static inline void vmathSoaM3GetCol1( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3Copy( result, &mat->col1 );
-}
-
-static inline void vmathSoaM3GetCol2( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3Copy( result, &mat->col2 );
-}
-
-static inline void vmathSoaM3GetCol( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int col )
-{
-    vmathSoaV3Copy( result, (&mat->col0 + col) );
-}
-
-static inline void vmathSoaM3GetRow( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int row )
-{
-    vmathSoaV3MakeFromElems( result, vmathSoaV3GetElem( &mat->col0, row ), vmathSoaV3GetElem( &mat->col1, row ), vmathSoaV3GetElem( &mat->col2, row ) );
-}
-
-static inline void vmathSoaM3Transpose( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
-{
-    VmathSoaMatrix3 tmpResult;
-    vmathSoaV3MakeFromElems( &tmpResult.col0, mat->col0.x, mat->col1.x, mat->col2.x );
-    vmathSoaV3MakeFromElems( &tmpResult.col1, mat->col0.y, mat->col1.y, mat->col2.y );
-    vmathSoaV3MakeFromElems( &tmpResult.col2, mat->col0.z, mat->col1.z, mat->col2.z );
-    vmathSoaM3Copy( result, &tmpResult );
-}
-
-static inline void vmathSoaM3Inverse( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
-{
-    VmathSoaVector3 tmp0, tmp1, tmp2;
-    vec_float4 detinv;
-    vmathSoaV3Cross( &tmp0, &mat->col1, &mat->col2 );
-    vmathSoaV3Cross( &tmp1, &mat->col2, &mat->col0 );
-    vmathSoaV3Cross( &tmp2, &mat->col0, &mat->col1 );
-    detinv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vmathSoaV3Dot( &mat->col2, &tmp2 ) );
-    vmathSoaV3MakeFromElems( &result->col0, vec_madd( tmp0.x, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.x, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.x, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV3MakeFromElems( &result->col1, vec_madd( tmp0.y, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.y, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.y, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV3MakeFromElems( &result->col2, vec_madd( tmp0.z, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.z, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.z, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-static inline vec_float4 vmathSoaM3Determinant( const VmathSoaMatrix3 *mat )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaV3Cross( &tmpV3_0, &mat->col0, &mat->col1 );
-    return vmathSoaV3Dot( &mat->col2, &tmpV3_0 );
-}
-
-static inline void vmathSoaM3Add( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
-{
-    vmathSoaV3Add( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathSoaV3Add( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathSoaV3Add( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathSoaM3Sub( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
-{
-    vmathSoaV3Sub( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathSoaV3Sub( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathSoaV3Sub( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathSoaM3Neg( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3Neg( &result->col0, &mat->col0 );
-    vmathSoaV3Neg( &result->col1, &mat->col1 );
-    vmathSoaV3Neg( &result->col2, &mat->col2 );
-}
-
-static inline void vmathSoaM3AbsPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3AbsPerElem( &result->col0, &mat->col0 );
-    vmathSoaV3AbsPerElem( &result->col1, &mat->col1 );
-    vmathSoaV3AbsPerElem( &result->col2, &mat->col2 );
-}
-
-static inline void vmathSoaM3ScalarMul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, vec_float4 scalar )
-{
-    vmathSoaV3ScalarMul( &result->col0, &mat->col0, scalar );
-    vmathSoaV3ScalarMul( &result->col1, &mat->col1, scalar );
-    vmathSoaV3ScalarMul( &result->col2, &mat->col2, scalar );
-}
-
-static inline void vmathSoaM3MulV3( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *vec )
-{
-    vec_float4 tmpX, tmpY, tmpZ;
-    tmpX = vec_add( vec_add( vec_madd( mat->col0.x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.x, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.x, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpY = vec_add( vec_add( vec_madd( mat->col0.y, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.y, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpZ = vec_add( vec_add( vec_madd( mat->col0.z, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.z, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathSoaM3Mul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
-{
-    VmathSoaMatrix3 tmpResult;
-    vmathSoaM3MulV3( &tmpResult.col0, mat0, &mat1->col0 );
-    vmathSoaM3MulV3( &tmpResult.col1, mat0, &mat1->col1 );
-    vmathSoaM3MulV3( &tmpResult.col2, mat0, &mat1->col2 );
-    vmathSoaM3Copy( result, &tmpResult );
-}
-
-static inline void vmathSoaM3MulPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
-{
-    vmathSoaV3MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathSoaV3MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathSoaV3MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathSoaM3MakeIdentity( VmathSoaMatrix3 *result )
-{
-    vmathSoaV3MakeXAxis( &result->col0 );
-    vmathSoaV3MakeYAxis( &result->col1 );
-    vmathSoaV3MakeZAxis( &result->col2 );
-}
-
-static inline void vmathSoaM3MakeRotationX( VmathSoaMatrix3 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV3MakeXAxis( &result->col0 );
-    vmathSoaV3MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, s );
-    vmathSoaV3MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), c );
-}
-
-static inline void vmathSoaM3MakeRotationY( VmathSoaMatrix3 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV3MakeFromElems( &result->col0, c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ) );
-    vmathSoaV3MakeYAxis( &result->col1 );
-    vmathSoaV3MakeFromElems( &result->col2, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c );
-}
-
-static inline void vmathSoaM3MakeRotationZ( VmathSoaMatrix3 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV3MakeFromElems( &result->col0, c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV3MakeFromElems( &result->col1, negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV3MakeZAxis( &result->col2 );
-}
-
-static inline void vmathSoaM3MakeRotationZYX( VmathSoaMatrix3 *result, const VmathSoaVector3 *radiansXYZ )
-{
-    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sincosf4( radiansXYZ->x, &sX, &cX );
-    sincosf4( radiansXYZ->y, &sY, &cY );
-    sincosf4( radiansXYZ->z, &sZ, &cZ );
-    tmp0 = vec_madd( cZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmp1 = vec_madd( sZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV3MakeFromElems( &result->col0, vec_madd( cZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), negatef4( sY ) );
-    vmathSoaV3MakeFromElems( &result->col1, vec_sub( vec_madd( tmp0, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( tmp1, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV3MakeFromElems( &result->col2, vec_add( vec_madd( tmp0, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( tmp1, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-static inline void vmathSoaM3MakeRotationAxis( VmathSoaMatrix3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
-{
-    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
-    sincosf4( radians, &s, &c );
-    x = unitVec->x;
-    y = unitVec->y;
-    z = unitVec->z;
-    xy = vec_madd( x, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    yz = vec_madd( y, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    zx = vec_madd( z, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
-    vmathSoaV3MakeFromElems( &result->col0, vec_add( vec_madd( vec_madd( x, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    vmathSoaV3MakeFromElems( &result->col1, vec_sub( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( y, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    vmathSoaV3MakeFromElems( &result->col2, vec_add( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( z, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ) );
-}
-
-static inline void vmathSoaM3MakeRotationQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat )
-{
-    vmathSoaM3MakeFromQ( result, unitQuat );
-}
-
-static inline void vmathSoaM3MakeScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec )
-{
-    vmathSoaV3MakeFromElems( &result->col0, scaleVec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV3MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV3MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec->z );
-}
-
-static inline void vmathSoaM3AppendScale( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *scaleVec )
-{
-    vmathSoaV3ScalarMul( &result->col0, &mat->col0, vmathSoaV3GetX( scaleVec ) );
-    vmathSoaV3ScalarMul( &result->col1, &mat->col1, vmathSoaV3GetY( scaleVec ) );
-    vmathSoaV3ScalarMul( &result->col2, &mat->col2, vmathSoaV3GetZ( scaleVec ) );
-}
-
-static inline void vmathSoaM3PrependScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3MulPerElem( &result->col0, &mat->col0, scaleVec );
-    vmathSoaV3MulPerElem( &result->col1, &mat->col1, scaleVec );
-    vmathSoaV3MulPerElem( &result->col2, &mat->col2, scaleVec );
-}
-
-static inline void vmathSoaM3Select( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1, vec_uint4 select1 )
-{
-    vmathSoaV3Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
-    vmathSoaV3Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
-    vmathSoaV3Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaM3Print( const VmathSoaMatrix3 *mat )
-{
-    VmathMatrix3 mat0, mat1, mat2, mat3;
-    vmathSoaM3Get4Aos( mat, &mat0, &mat1, &mat2, &mat3 );
-    printf("slot 0:\n");
-    vmathM3Print( &mat0 );
-    printf("slot 1:\n");
-    vmathM3Print( &mat1 );
-    printf("slot 2:\n");
-    vmathM3Print( &mat2 );
-    printf("slot 3:\n");
-    vmathM3Print( &mat3 );
-}
-
-static inline void vmathSoaM3Prints( const VmathSoaMatrix3 *mat, const char *name )
-{
-    printf("%s:\n", name);
-    vmathSoaM3Print( mat );
-}
-
-#endif
-
-static inline void vmathSoaM4Copy( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4Copy( &result->col0, &mat->col0 );
-    vmathSoaV4Copy( &result->col1, &mat->col1 );
-    vmathSoaV4Copy( &result->col2, &mat->col2 );
-    vmathSoaV4Copy( &result->col3, &mat->col3 );
-}
-
-static inline void vmathSoaM4MakeFromScalar( VmathSoaMatrix4 *result, vec_float4 scalar )
-{
-    vmathSoaV4MakeFromScalar( &result->col0, scalar );
-    vmathSoaV4MakeFromScalar( &result->col1, scalar );
-    vmathSoaV4MakeFromScalar( &result->col2, scalar );
-    vmathSoaV4MakeFromScalar( &result->col3, scalar );
-}
-
-static inline void vmathSoaM4MakeFromT3( VmathSoaMatrix4 *result, const VmathSoaTransform3 *mat )
-{
-    vmathSoaV4MakeFromV3Scalar( &result->col0, &mat->col0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromV3Scalar( &result->col1, &mat->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromV3Scalar( &result->col2, &mat->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromV3Scalar( &result->col3, &mat->col3, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-static inline void vmathSoaM4MakeFromCols( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col0, const VmathSoaVector4 *_col1, const VmathSoaVector4 *_col2, const VmathSoaVector4 *_col3 )
-{
-    vmathSoaV4Copy( &result->col0, _col0 );
-    vmathSoaV4Copy( &result->col1, _col1 );
-    vmathSoaV4Copy( &result->col2, _col2 );
-    vmathSoaV4Copy( &result->col3, _col3 );
-}
-
-static inline void vmathSoaM4MakeFromM3V3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *translateVec )
-{
-    vmathSoaV4MakeFromV3Scalar( &result->col0, &mat->col0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromV3Scalar( &result->col1, &mat->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromV3Scalar( &result->col2, &mat->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromV3Scalar( &result->col3, translateVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-static inline void vmathSoaM4MakeFromQV3( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec )
-{
-    VmathSoaMatrix3 mat;
-    vmathSoaM3MakeFromQ( &mat, unitQuat );
-    vmathSoaV4MakeFromV3Scalar( &result->col0, &mat.col0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromV3Scalar( &result->col1, &mat.col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromV3Scalar( &result->col2, &mat.col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromV3Scalar( &result->col3, translateVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-static inline void vmathSoaM4MakeFromAos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vmathSoaV4MakeFromAos( &result->col0, &mat->col0 );
-    vmathSoaV4MakeFromAos( &result->col1, &mat->col1 );
-    vmathSoaV4MakeFromAos( &result->col2, &mat->col2 );
-    vmathSoaV4MakeFromAos( &result->col3, &mat->col3 );
-}
-
-static inline void vmathSoaM4MakeFrom4Aos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, const VmathMatrix4 *mat2, const VmathMatrix4 *mat3 )
-{
-    vmathSoaV4MakeFrom4Aos( &result->col0, &mat0->col0, &mat1->col0, &mat2->col0, &mat3->col0 );
-    vmathSoaV4MakeFrom4Aos( &result->col1, &mat0->col1, &mat1->col1, &mat2->col1, &mat3->col1 );
-    vmathSoaV4MakeFrom4Aos( &result->col2, &mat0->col2, &mat1->col2, &mat2->col2, &mat3->col2 );
-    vmathSoaV4MakeFrom4Aos( &result->col3, &mat0->col3, &mat1->col3, &mat2->col3, &mat3->col3 );
-}
-
-static inline void vmathSoaM4Get4Aos( const VmathSoaMatrix4 *mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 )
-{
-    vmathSoaV4Get4Aos( &mat->col0, &result0->col0, &result1->col0, &result2->col0, &result3->col0 );
-    vmathSoaV4Get4Aos( &mat->col1, &result0->col1, &result1->col1, &result2->col1, &result3->col1 );
-    vmathSoaV4Get4Aos( &mat->col2, &result0->col2, &result1->col2, &result2->col2, &result3->col2 );
-    vmathSoaV4Get4Aos( &mat->col3, &result0->col3, &result1->col3, &result2->col3, &result3->col3 );
-}
-
-static inline void vmathSoaM4SetCol0( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col0 )
-{
-    vmathSoaV4Copy( &result->col0, _col0 );
-}
-
-static inline void vmathSoaM4SetCol1( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col1 )
-{
-    vmathSoaV4Copy( &result->col1, _col1 );
-}
-
-static inline void vmathSoaM4SetCol2( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col2 )
-{
-    vmathSoaV4Copy( &result->col2, _col2 );
-}
-
-static inline void vmathSoaM4SetCol3( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col3 )
-{
-    vmathSoaV4Copy( &result->col3, _col3 );
-}
-
-static inline void vmathSoaM4SetCol( VmathSoaMatrix4 *result, int col, const VmathSoaVector4 *vec )
-{
-    vmathSoaV4Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathSoaM4SetRow( VmathSoaMatrix4 *result, int row, const VmathSoaVector4 *vec )
-{
-    vmathSoaV4SetElem( &result->col0, row, vmathSoaV4GetElem( vec, 0 ) );
-    vmathSoaV4SetElem( &result->col1, row, vmathSoaV4GetElem( vec, 1 ) );
-    vmathSoaV4SetElem( &result->col2, row, vmathSoaV4GetElem( vec, 2 ) );
-    vmathSoaV4SetElem( &result->col3, row, vmathSoaV4GetElem( vec, 3 ) );
-}
-
-static inline void vmathSoaM4SetElem( VmathSoaMatrix4 *result, int col, int row, vec_float4 val )
-{
-    VmathSoaVector4 tmpV3_0;
-    vmathSoaM4GetCol( &tmpV3_0, result, col );
-    vmathSoaV4SetElem( &tmpV3_0, row, val );
-    vmathSoaM4SetCol( result, col, &tmpV3_0 );
-}
-
-static inline vec_float4 vmathSoaM4GetElem( const VmathSoaMatrix4 *mat, int col, int row )
-{
-    VmathSoaVector4 tmpV4_0;
-    vmathSoaM4GetCol( &tmpV4_0, mat, col );
-    return vmathSoaV4GetElem( &tmpV4_0, row );
-}
-
-static inline void vmathSoaM4GetCol0( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4Copy( result, &mat->col0 );
-}
-
-static inline void vmathSoaM4GetCol1( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4Copy( result, &mat->col1 );
-}
-
-static inline void vmathSoaM4GetCol2( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4Copy( result, &mat->col2 );
-}
-
-static inline void vmathSoaM4GetCol3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4Copy( result, &mat->col3 );
-}
-
-static inline void vmathSoaM4GetCol( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int col )
-{
-    vmathSoaV4Copy( result, (&mat->col0 + col) );
-}
-
-static inline void vmathSoaM4GetRow( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int row )
-{
-    vmathSoaV4MakeFromElems( result, vmathSoaV4GetElem( &mat->col0, row ), vmathSoaV4GetElem( &mat->col1, row ), vmathSoaV4GetElem( &mat->col2, row ), vmathSoaV4GetElem( &mat->col3, row ) );
-}
-
-static inline void vmathSoaM4Transpose( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    VmathSoaMatrix4 tmpResult;
-    vmathSoaV4MakeFromElems( &tmpResult.col0, mat->col0.x, mat->col1.x, mat->col2.x, mat->col3.x );
-    vmathSoaV4MakeFromElems( &tmpResult.col1, mat->col0.y, mat->col1.y, mat->col2.y, mat->col3.y );
-    vmathSoaV4MakeFromElems( &tmpResult.col2, mat->col0.z, mat->col1.z, mat->col2.z, mat->col3.z );
-    vmathSoaV4MakeFromElems( &tmpResult.col3, mat->col0.w, mat->col1.w, mat->col2.w, mat->col3.w );
-    vmathSoaM4Copy( result, &tmpResult );
-}
-
-static inline void vmathSoaM4Inverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    VmathSoaVector4 res0, res1, res2, res3;
-    vec_float4 mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
-    mA = mat->col0.x;
-    mB = mat->col0.y;
-    mC = mat->col0.z;
-    mD = mat->col0.w;
-    mE = mat->col1.x;
-    mF = mat->col1.y;
-    mG = mat->col1.z;
-    mH = mat->col1.w;
-    mI = mat->col2.x;
-    mJ = mat->col2.y;
-    mK = mat->col2.z;
-    mL = mat->col2.w;
-    mM = mat->col3.x;
-    mN = mat->col3.y;
-    mO = mat->col3.z;
-    mP = mat->col3.w;
-    tmp0 = vec_sub( vec_madd( mK, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp1 = vec_sub( vec_madd( mO, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp2 = vec_sub( vec_madd( mB, mK, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mJ, mC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp3 = vec_sub( vec_madd( mF, mO, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mN, mG, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp4 = vec_sub( vec_madd( mJ, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mB, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp5 = vec_sub( vec_madd( mN, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mF, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetX( &res0, vec_sub( vec_sub( vec_madd( mJ, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mL, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mK, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    vmathSoaV4SetY( &res0, vec_sub( vec_sub( vec_madd( mN, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mP, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mO, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    vmathSoaV4SetZ( &res0, vec_sub( vec_add( vec_madd( mD, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mB, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    vmathSoaV4SetW( &res0, vec_sub( vec_add( vec_madd( mH, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mF, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    detInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_add( vec_add( vec_add( vec_madd( mA, res0.x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, res0.y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mI, res0.z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mM, res0.w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    vmathSoaV4SetX( &res1, vec_madd( mI, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetY( &res1, vec_madd( mM, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetZ( &res1, vec_madd( mA, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetW( &res1, vec_madd( mE, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetX( &res3, vec_madd( mI, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetY( &res3, vec_madd( mM, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetZ( &res3, vec_madd( mA, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetW( &res3, vec_madd( mE, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetX( &res2, vec_madd( mI, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetY( &res2, vec_madd( mM, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetZ( &res2, vec_madd( mA, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetW( &res2, vec_madd( mE, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp0 = vec_sub( vec_madd( mI, mB, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mA, mJ, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp1 = vec_sub( vec_madd( mM, mF, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, mN, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp2 = vec_sub( vec_madd( mI, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mA, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp3 = vec_sub( vec_madd( mM, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp4 = vec_sub( vec_madd( mI, mC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mA, mK, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp5 = vec_sub( vec_madd( mM, mG, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, mO, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetX( &res2, vec_add( vec_sub( vec_madd( mL, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mJ, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.x ) );
-    vmathSoaV4SetY( &res2, vec_add( vec_sub( vec_madd( mP, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mN, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.y ) );
-    vmathSoaV4SetZ( &res2, vec_sub( vec_sub( vec_madd( mB, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mD, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.z ) );
-    vmathSoaV4SetW( &res2, vec_sub( vec_sub( vec_madd( mF, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mH, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.w ) );
-    vmathSoaV4SetX( &res3, vec_add( vec_sub( vec_madd( mJ, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mK, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.x ) );
-    vmathSoaV4SetY( &res3, vec_add( vec_sub( vec_madd( mN, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mO, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.y ) );
-    vmathSoaV4SetZ( &res3, vec_sub( vec_sub( vec_madd( mC, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mB, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.z ) );
-    vmathSoaV4SetW( &res3, vec_sub( vec_sub( vec_madd( mG, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mF, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.w ) );
-    vmathSoaV4SetX( &res1, vec_sub( vec_sub( vec_madd( mK, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mL, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.x ) );
-    vmathSoaV4SetY( &res1, vec_sub( vec_sub( vec_madd( mO, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mP, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.y ) );
-    vmathSoaV4SetZ( &res1, vec_add( vec_sub( vec_madd( mD, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.z ) );
-    vmathSoaV4SetW( &res1, vec_add( vec_sub( vec_madd( mH, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.w ) );
-    vmathSoaV4ScalarMul( &result->col0, &res0, detInv );
-    vmathSoaV4ScalarMul( &result->col1, &res1, detInv );
-    vmathSoaV4ScalarMul( &result->col2, &res2, detInv );
-    vmathSoaV4ScalarMul( &result->col3, &res3, detInv );
-}
-
-static inline void vmathSoaM4AffineInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    VmathSoaTransform3 affineMat, tmpT3_0;
-    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    vmathSoaV4GetXYZ( &tmpV3_0, &mat->col0 );
-    vmathSoaT3SetCol0( &affineMat, &tmpV3_0 );
-    vmathSoaV4GetXYZ( &tmpV3_1, &mat->col1 );
-    vmathSoaT3SetCol1( &affineMat, &tmpV3_1 );
-    vmathSoaV4GetXYZ( &tmpV3_2, &mat->col2 );
-    vmathSoaT3SetCol2( &affineMat, &tmpV3_2 );
-    vmathSoaV4GetXYZ( &tmpV3_3, &mat->col3 );
-    vmathSoaT3SetCol3( &affineMat, &tmpV3_3 );
-    vmathSoaT3Inverse( &tmpT3_0, &affineMat );
-    vmathSoaM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline void vmathSoaM4OrthoInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    VmathSoaTransform3 affineMat, tmpT3_0;
-    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    vmathSoaV4GetXYZ( &tmpV3_0, &mat->col0 );
-    vmathSoaT3SetCol0( &affineMat, &tmpV3_0 );
-    vmathSoaV4GetXYZ( &tmpV3_1, &mat->col1 );
-    vmathSoaT3SetCol1( &affineMat, &tmpV3_1 );
-    vmathSoaV4GetXYZ( &tmpV3_2, &mat->col2 );
-    vmathSoaT3SetCol2( &affineMat, &tmpV3_2 );
-    vmathSoaV4GetXYZ( &tmpV3_3, &mat->col3 );
-    vmathSoaT3SetCol3( &affineMat, &tmpV3_3 );
-    vmathSoaT3OrthoInverse( &tmpT3_0, &affineMat );
-    vmathSoaM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline vec_float4 vmathSoaM4Determinant( const VmathSoaMatrix4 *mat )
-{
-    vec_float4 dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-    mA = mat->col0.x;
-    mB = mat->col0.y;
-    mC = mat->col0.z;
-    mD = mat->col0.w;
-    mE = mat->col1.x;
-    mF = mat->col1.y;
-    mG = mat->col1.z;
-    mH = mat->col1.w;
-    mI = mat->col2.x;
-    mJ = mat->col2.y;
-    mK = mat->col2.z;
-    mL = mat->col2.w;
-    mM = mat->col3.x;
-    mN = mat->col3.y;
-    mO = mat->col3.z;
-    mP = mat->col3.w;
-    tmp0 = vec_sub( vec_madd( mK, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp1 = vec_sub( vec_madd( mO, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp2 = vec_sub( vec_madd( mB, mK, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mJ, mC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp3 = vec_sub( vec_madd( mF, mO, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mN, mG, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp4 = vec_sub( vec_madd( mJ, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mB, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp5 = vec_sub( vec_madd( mN, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mF, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    dx = vec_sub( vec_sub( vec_madd( mJ, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mL, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mK, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    dy = vec_sub( vec_sub( vec_madd( mN, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mP, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mO, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    dz = vec_sub( vec_add( vec_madd( mD, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mB, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    dw = vec_sub( vec_add( vec_madd( mH, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mF, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return vec_add( vec_add( vec_add( vec_madd( mA, dx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, dy, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mI, dz, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mM, dw, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-static inline void vmathSoaM4Add( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
-{
-    vmathSoaV4Add( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathSoaV4Add( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathSoaV4Add( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathSoaV4Add( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathSoaM4Sub( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
-{
-    vmathSoaV4Sub( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathSoaV4Sub( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathSoaV4Sub( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathSoaV4Sub( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathSoaM4Neg( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4Neg( &result->col0, &mat->col0 );
-    vmathSoaV4Neg( &result->col1, &mat->col1 );
-    vmathSoaV4Neg( &result->col2, &mat->col2 );
-    vmathSoaV4Neg( &result->col3, &mat->col3 );
-}
-
-static inline void vmathSoaM4AbsPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4AbsPerElem( &result->col0, &mat->col0 );
-    vmathSoaV4AbsPerElem( &result->col1, &mat->col1 );
-    vmathSoaV4AbsPerElem( &result->col2, &mat->col2 );
-    vmathSoaV4AbsPerElem( &result->col3, &mat->col3 );
-}
-
-static inline void vmathSoaM4ScalarMul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, vec_float4 scalar )
-{
-    vmathSoaV4ScalarMul( &result->col0, &mat->col0, scalar );
-    vmathSoaV4ScalarMul( &result->col1, &mat->col1, scalar );
-    vmathSoaV4ScalarMul( &result->col2, &mat->col2, scalar );
-    vmathSoaV4ScalarMul( &result->col3, &mat->col3, scalar );
-}
-
-static inline void vmathSoaM4MulV4( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector4 *vec )
-{
-    vec_float4 tmpX, tmpY, tmpZ, tmpW;
-    tmpX = vec_add( vec_add( vec_add( vec_madd( mat->col0.x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.x, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.x, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col3.x, vec->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpY = vec_add( vec_add( vec_add( vec_madd( mat->col0.y, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.y, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col3.y, vec->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpZ = vec_add( vec_add( vec_add( vec_madd( mat->col0.z, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.z, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col3.z, vec->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpW = vec_add( vec_add( vec_add( vec_madd( mat->col0.w, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.w, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.w, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col3.w, vec->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4MakeFromElems( result, tmpX, tmpY, tmpZ, tmpW );
-}
-
-static inline void vmathSoaM4MulV3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *vec )
-{
-    result->x = vec_add( vec_add( vec_madd( mat->col0.x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.x, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.x, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result->y = vec_add( vec_add( vec_madd( mat->col0.y, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.y, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result->z = vec_add( vec_add( vec_madd( mat->col0.z, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.z, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result->w = vec_add( vec_add( vec_madd( mat->col0.w, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.w, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.w, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-static inline void vmathSoaM4MulP3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaPoint3 *pnt )
-{
-    result->x = vec_add( vec_add( vec_add( vec_madd( mat->col0.x, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.x, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.x, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mat->col3.x );
-    result->y = vec_add( vec_add( vec_add( vec_madd( mat->col0.y, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.y, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.y, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mat->col3.y );
-    result->z = vec_add( vec_add( vec_add( vec_madd( mat->col0.z, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.z, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.z, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mat->col3.z );
-    result->w = vec_add( vec_add( vec_add( vec_madd( mat->col0.w, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.w, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.w, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mat->col3.w );
-}
-
-static inline void vmathSoaM4Mul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
-{
-    VmathSoaMatrix4 tmpResult;
-    vmathSoaM4MulV4( &tmpResult.col0, mat0, &mat1->col0 );
-    vmathSoaM4MulV4( &tmpResult.col1, mat0, &mat1->col1 );
-    vmathSoaM4MulV4( &tmpResult.col2, mat0, &mat1->col2 );
-    vmathSoaM4MulV4( &tmpResult.col3, mat0, &mat1->col3 );
-    vmathSoaM4Copy( result, &tmpResult );
-}
-
-static inline void vmathSoaM4MulT3( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaTransform3 *tfrm1 )
-{
-    VmathSoaMatrix4 tmpResult;
-    VmathSoaPoint3 tmpP3_0;
-    vmathSoaM4MulV3( &tmpResult.col0, mat, &tfrm1->col0 );
-    vmathSoaM4MulV3( &tmpResult.col1, mat, &tfrm1->col1 );
-    vmathSoaM4MulV3( &tmpResult.col2, mat, &tfrm1->col2 );
-    vmathSoaP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
-    vmathSoaM4MulP3( &tmpResult.col3, mat, &tmpP3_0 );
-    vmathSoaM4Copy( result, &tmpResult );
-}
-
-static inline void vmathSoaM4MulPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
-{
-    vmathSoaV4MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathSoaV4MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathSoaV4MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathSoaV4MulPerElem( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathSoaM4MakeIdentity( VmathSoaMatrix4 *result )
-{
-    vmathSoaV4MakeXAxis( &result->col0 );
-    vmathSoaV4MakeYAxis( &result->col1 );
-    vmathSoaV4MakeZAxis( &result->col2 );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4SetUpper3x3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat3 )
-{
-    vmathSoaV4SetXYZ( &result->col0, &mat3->col0 );
-    vmathSoaV4SetXYZ( &result->col1, &mat3->col1 );
-    vmathSoaV4SetXYZ( &result->col2, &mat3->col2 );
-}
-
-static inline void vmathSoaM4GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4GetXYZ( &result->col0, &mat->col0 );
-    vmathSoaV4GetXYZ( &result->col1, &mat->col1 );
-    vmathSoaV4GetXYZ( &result->col2, &mat->col2 );
-}
-
-static inline void vmathSoaM4SetTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec )
-{
-    vmathSoaV4SetXYZ( &result->col3, translateVec );
-}
-
-static inline void vmathSoaM4GetTranslation( VmathSoaVector3 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4GetXYZ( result, &mat->col3 );
-}
-
-static inline void vmathSoaM4MakeRotationX( VmathSoaMatrix4 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV4MakeXAxis( &result->col0 );
-    vmathSoaV4MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4MakeRotationY( VmathSoaMatrix4 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV4MakeFromElems( &result->col0, c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeYAxis( &result->col1 );
-    vmathSoaV4MakeFromElems( &result->col2, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4MakeRotationZ( VmathSoaMatrix4 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV4MakeFromElems( &result->col0, c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col1, negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeZAxis( &result->col2 );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4MakeRotationZYX( VmathSoaMatrix4 *result, const VmathSoaVector3 *radiansXYZ )
-{
-    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sincosf4( radiansXYZ->x, &sX, &cX );
-    sincosf4( radiansXYZ->y, &sY, &cY );
-    sincosf4( radiansXYZ->z, &sZ, &cZ );
-    tmp0 = vec_madd( cZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmp1 = vec_madd( sZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col0, vec_madd( cZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), negatef4( sY ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col1, vec_sub( vec_madd( tmp0, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( tmp1, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col2, vec_add( vec_madd( tmp0, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( tmp1, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4MakeRotationAxis( VmathSoaMatrix4 *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
-{
-    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
-    sincosf4( radians, &s, &c );
-    x = unitVec->x;
-    y = unitVec->y;
-    z = unitVec->z;
-    xy = vec_madd( x, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    yz = vec_madd( y, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    zx = vec_madd( z, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
-    vmathSoaV4MakeFromElems( &result->col0, vec_add( vec_madd( vec_madd( x, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col1, vec_sub( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( y, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col2, vec_add( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( z, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4MakeRotationQ( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat )
-{
-    VmathSoaTransform3 tmpT3_0;
-    vmathSoaT3MakeRotationQ( &tmpT3_0, unitQuat );
-    vmathSoaM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline void vmathSoaM4MakeScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec )
-{
-    vmathSoaV4MakeFromElems( &result->col0, scaleVec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4AppendScale( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *scaleVec )
-{
-    vmathSoaV4ScalarMul( &result->col0, &mat->col0, vmathSoaV3GetX( scaleVec ) );
-    vmathSoaV4ScalarMul( &result->col1, &mat->col1, vmathSoaV3GetY( scaleVec ) );
-    vmathSoaV4ScalarMul( &result->col2, &mat->col2, vmathSoaV3GetZ( scaleVec ) );
-    vmathSoaV4Copy( &result->col3, &mat->col3 );
-}
-
-static inline void vmathSoaM4PrependScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix4 *mat )
-{
-    VmathSoaVector4 scale4;
-    vmathSoaV4MakeFromV3Scalar( &scale4, scaleVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-    vmathSoaV4MulPerElem( &result->col0, &mat->col0, &scale4 );
-    vmathSoaV4MulPerElem( &result->col1, &mat->col1, &scale4 );
-    vmathSoaV4MulPerElem( &result->col2, &mat->col2, &scale4 );
-    vmathSoaV4MulPerElem( &result->col3, &mat->col3, &scale4 );
-}
-
-static inline void vmathSoaM4MakeTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec )
-{
-    vmathSoaV4MakeXAxis( &result->col0 );
-    vmathSoaV4MakeYAxis( &result->col1 );
-    vmathSoaV4MakeZAxis( &result->col2 );
-    vmathSoaV4MakeFromV3Scalar( &result->col3, translateVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-static inline void vmathSoaM4MakeLookAt( VmathSoaMatrix4 *result, const VmathSoaPoint3 *eyePos, const VmathSoaPoint3 *lookAtPos, const VmathSoaVector3 *upVec )
-{
-    VmathSoaMatrix4 m4EyeFrame;
-    VmathSoaVector3 v3X, v3Y, v3Z, tmpV3_0, tmpV3_1;
-    VmathSoaVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
-    vmathSoaV3Normalize( &v3Y, upVec );
-    vmathSoaP3Sub( &tmpV3_0, eyePos, lookAtPos );
-    vmathSoaV3Normalize( &v3Z, &tmpV3_0 );
-    vmathSoaV3Cross( &tmpV3_1, &v3Y, &v3Z );
-    vmathSoaV3Normalize( &v3X, &tmpV3_1 );
-    vmathSoaV3Cross( &v3Y, &v3Z, &v3X );
-    vmathSoaV4MakeFromV3( &tmpV4_0, &v3X );
-    vmathSoaV4MakeFromV3( &tmpV4_1, &v3Y );
-    vmathSoaV4MakeFromV3( &tmpV4_2, &v3Z );
-    vmathSoaV4MakeFromP3( &tmpV4_3, eyePos );
-    vmathSoaM4MakeFromCols( &m4EyeFrame, &tmpV4_0, &tmpV4_1, &tmpV4_2, &tmpV4_3 );
-    vmathSoaM4OrthoInverse( result, &m4EyeFrame );
-}
-
-static inline void vmathSoaM4MakePerspective( VmathSoaMatrix4 *result, vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar )
-{
-    vec_float4 f, rangeInv;
-    f = tanf4( vec_sub( ((vec_float4){_VECTORMATH_PI_OVER_2,_VECTORMATH_PI_OVER_2,_VECTORMATH_PI_OVER_2,_VECTORMATH_PI_OVER_2}), vec_madd( ((vec_float4){0.5f,0.5f,0.5f,0.5f}), fovyRadians, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    rangeInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( zNear, zFar ) );
-    vmathSoaV4MakeFromElems( &result->col0, divf4( f, aspect ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), f, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( vec_add( zNear, zFar ), rangeInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){-1.0f,-1.0f,-1.0f,-1.0f}) );
-    vmathSoaV4MakeFromElems( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( vec_madd( vec_madd( zNear, zFar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), rangeInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){2.0f,2.0f,2.0f,2.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaM4MakeFrustum( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
-{
-    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
-    sum_rl = vec_add( right, left );
-    sum_tb = vec_add( top, bottom );
-    sum_nf = vec_add( zNear, zFar );
-    inv_rl = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( right, left ) );
-    inv_tb = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( top, bottom ) );
-    inv_nf = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( zNear, zFar ) );
-    n2 = vec_add( zNear, zNear );
-    vmathSoaV4MakeFromElems( &result->col0, vec_madd( n2, inv_rl, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( n2, inv_tb, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col2, vec_madd( sum_rl, inv_rl, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sum_tb, inv_tb, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sum_nf, inv_nf, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){-1.0f,-1.0f,-1.0f,-1.0f}) );
-    vmathSoaV4MakeFromElems( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( vec_madd( n2, inv_nf, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), zFar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaM4MakeOrthographic( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
-{
-    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
-    sum_rl = vec_add( right, left );
-    sum_tb = vec_add( top, bottom );
-    sum_nf = vec_add( zNear, zFar );
-    inv_rl = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( right, left ) );
-    inv_tb = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( top, bottom ) );
-    inv_nf = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( zNear, zFar ) );
-    vmathSoaV4MakeFromElems( &result->col0, vec_add( inv_rl, inv_rl ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_add( inv_tb, inv_tb ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_add( inv_nf, inv_nf ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col3, vec_madd( negatef4( sum_rl ), inv_rl, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( negatef4( sum_tb ), inv_tb, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sum_nf, inv_nf, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-static inline void vmathSoaM4Select( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1, vec_uint4 select1 )
-{
-    vmathSoaV4Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
-    vmathSoaV4Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
-    vmathSoaV4Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
-    vmathSoaV4Select( &result->col3, &mat0->col3, &mat1->col3, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaM4Print( const VmathSoaMatrix4 *mat )
-{
-    VmathMatrix4 mat0, mat1, mat2, mat3;
-    vmathSoaM4Get4Aos( mat, &mat0, &mat1, &mat2, &mat3 );
-    printf("slot 0:\n");
-    vmathM4Print( &mat0 );
-    printf("slot 1:\n");
-    vmathM4Print( &mat1 );
-    printf("slot 2:\n");
-    vmathM4Print( &mat2 );
-    printf("slot 3:\n");
-    vmathM4Print( &mat3 );
-}
-
-static inline void vmathSoaM4Prints( const VmathSoaMatrix4 *mat, const char *name )
-{
-    printf("%s:\n", name);
-    vmathSoaM4Print( mat );
-}
-
-#endif
-
-static inline void vmathSoaT3Copy( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3Copy( &result->col0, &tfrm->col0 );
-    vmathSoaV3Copy( &result->col1, &tfrm->col1 );
-    vmathSoaV3Copy( &result->col2, &tfrm->col2 );
-    vmathSoaV3Copy( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathSoaT3MakeFromScalar( VmathSoaTransform3 *result, vec_float4 scalar )
-{
-    vmathSoaV3MakeFromScalar( &result->col0, scalar );
-    vmathSoaV3MakeFromScalar( &result->col1, scalar );
-    vmathSoaV3MakeFromScalar( &result->col2, scalar );
-    vmathSoaV3MakeFromScalar( &result->col3, scalar );
-}
-
-static inline void vmathSoaT3MakeFromCols( VmathSoaTransform3 *result, const VmathSoaVector3 *_col0, const VmathSoaVector3 *_col1, const VmathSoaVector3 *_col2, const VmathSoaVector3 *_col3 )
-{
-    vmathSoaV3Copy( &result->col0, _col0 );
-    vmathSoaV3Copy( &result->col1, _col1 );
-    vmathSoaV3Copy( &result->col2, _col2 );
-    vmathSoaV3Copy( &result->col3, _col3 );
-}
-
-static inline void vmathSoaT3MakeFromM3V3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *tfrm, const VmathSoaVector3 *translateVec )
-{
-    vmathSoaT3SetUpper3x3( result, tfrm );
-    vmathSoaT3SetTranslation( result, translateVec );
-}
-
-static inline void vmathSoaT3MakeFromQV3( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec )
-{
-    VmathSoaMatrix3 tmpM3_0;
-    vmathSoaM3MakeFromQ( &tmpM3_0, unitQuat );
-    vmathSoaT3SetUpper3x3( result, &tmpM3_0 );
-    vmathSoaT3SetTranslation( result, translateVec );
-}
-
-static inline void vmathSoaT3MakeFromAos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm )
-{
-    vmathSoaV3MakeFromAos( &result->col0, &tfrm->col0 );
-    vmathSoaV3MakeFromAos( &result->col1, &tfrm->col1 );
-    vmathSoaV3MakeFromAos( &result->col2, &tfrm->col2 );
-    vmathSoaV3MakeFromAos( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathSoaT3MakeFrom4Aos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, const VmathTransform3 *tfrm2, const VmathTransform3 *tfrm3 )
-{
-    vmathSoaV3MakeFrom4Aos( &result->col0, &tfrm0->col0, &tfrm1->col0, &tfrm2->col0, &tfrm3->col0 );
-    vmathSoaV3MakeFrom4Aos( &result->col1, &tfrm0->col1, &tfrm1->col1, &tfrm2->col1, &tfrm3->col1 );
-    vmathSoaV3MakeFrom4Aos( &result->col2, &tfrm0->col2, &tfrm1->col2, &tfrm2->col2, &tfrm3->col2 );
-    vmathSoaV3MakeFrom4Aos( &result->col3, &tfrm0->col3, &tfrm1->col3, &tfrm2->col3, &tfrm3->col3 );
-}
-
-static inline void vmathSoaT3Get4Aos( const VmathSoaTransform3 *tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 )
-{
-    vmathSoaV3Get4Aos( &tfrm->col0, &result0->col0, &result1->col0, &result2->col0, &result3->col0 );
-    vmathSoaV3Get4Aos( &tfrm->col1, &result0->col1, &result1->col1, &result2->col1, &result3->col1 );
-    vmathSoaV3Get4Aos( &tfrm->col2, &result0->col2, &result1->col2, &result2->col2, &result3->col2 );
-    vmathSoaV3Get4Aos( &tfrm->col3, &result0->col3, &result1->col3, &result2->col3, &result3->col3 );
-}
-
-static inline void vmathSoaT3SetCol0( VmathSoaTransform3 *result, const VmathSoaVector3 *_col0 )
-{
-    vmathSoaV3Copy( &result->col0, _col0 );
-}
-
-static inline void vmathSoaT3SetCol1( VmathSoaTransform3 *result, const VmathSoaVector3 *_col1 )
-{
-    vmathSoaV3Copy( &result->col1, _col1 );
-}
-
-static inline void vmathSoaT3SetCol2( VmathSoaTransform3 *result, const VmathSoaVector3 *_col2 )
-{
-    vmathSoaV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathSoaT3SetCol3( VmathSoaTransform3 *result, const VmathSoaVector3 *_col3 )
-{
-    vmathSoaV3Copy( &result->col3, _col3 );
-}
-
-static inline void vmathSoaT3SetCol( VmathSoaTransform3 *result, int col, const VmathSoaVector3 *vec )
-{
-    vmathSoaV3Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathSoaT3SetRow( VmathSoaTransform3 *result, int row, const VmathSoaVector4 *vec )
-{
-    vmathSoaV3SetElem( &result->col0, row, vmathSoaV4GetElem( vec, 0 ) );
-    vmathSoaV3SetElem( &result->col1, row, vmathSoaV4GetElem( vec, 1 ) );
-    vmathSoaV3SetElem( &result->col2, row, vmathSoaV4GetElem( vec, 2 ) );
-    vmathSoaV3SetElem( &result->col3, row, vmathSoaV4GetElem( vec, 3 ) );
-}
-
-static inline void vmathSoaT3SetElem( VmathSoaTransform3 *result, int col, int row, vec_float4 val )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaT3GetCol( &tmpV3_0, result, col );
-    vmathSoaV3SetElem( &tmpV3_0, row, val );
-    vmathSoaT3SetCol( result, col, &tmpV3_0 );
-}
-
-static inline vec_float4 vmathSoaT3GetElem( const VmathSoaTransform3 *tfrm, int col, int row )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaT3GetCol( &tmpV3_0, tfrm, col );
-    return vmathSoaV3GetElem( &tmpV3_0, row );
-}
-
-static inline void vmathSoaT3GetCol0( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3Copy( result, &tfrm->col0 );
-}
-
-static inline void vmathSoaT3GetCol1( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3Copy( result, &tfrm->col1 );
-}
-
-static inline void vmathSoaT3GetCol2( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3Copy( result, &tfrm->col2 );
-}
-
-static inline void vmathSoaT3GetCol3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3Copy( result, &tfrm->col3 );
-}
-
-static inline void vmathSoaT3GetCol( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, int col )
-{
-    vmathSoaV3Copy( result, (&tfrm->col0 + col) );
-}
-
-static inline void vmathSoaT3GetRow( VmathSoaVector4 *result, const VmathSoaTransform3 *tfrm, int row )
-{
-    vmathSoaV4MakeFromElems( result, vmathSoaV3GetElem( &tfrm->col0, row ), vmathSoaV3GetElem( &tfrm->col1, row ), vmathSoaV3GetElem( &tfrm->col2, row ), vmathSoaV3GetElem( &tfrm->col3, row ) );
-}
-
-static inline void vmathSoaT3Inverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
-{
-    VmathSoaVector3 tmp0, tmp1, tmp2, inv0, inv1, inv2, tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5;
-    vec_float4 detinv;
-    vmathSoaV3Cross( &tmp0, &tfrm->col1, &tfrm->col2 );
-    vmathSoaV3Cross( &tmp1, &tfrm->col2, &tfrm->col0 );
-    vmathSoaV3Cross( &tmp2, &tfrm->col0, &tfrm->col1 );
-    detinv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vmathSoaV3Dot( &tfrm->col2, &tmp2 ) );
-    vmathSoaV3MakeFromElems( &inv0, vec_madd( tmp0.x, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.x, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.x, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV3MakeFromElems( &inv1, vec_madd( tmp0.y, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.y, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.y, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV3MakeFromElems( &inv2, vec_madd( tmp0.z, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.z, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.z, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV3Copy( &result->col0, &inv0 );
-    vmathSoaV3Copy( &result->col1, &inv1 );
-    vmathSoaV3Copy( &result->col2, &inv2 );
-    vmathSoaV3ScalarMul( &tmpV3_0, &inv0, tfrm->col3.x );
-    vmathSoaV3ScalarMul( &tmpV3_1, &inv1, tfrm->col3.y );
-    vmathSoaV3ScalarMul( &tmpV3_2, &inv2, tfrm->col3.z );
-    vmathSoaV3Add( &tmpV3_3, &tmpV3_1, &tmpV3_2 );
-    vmathSoaV3Add( &tmpV3_4, &tmpV3_0, &tmpV3_3 );
-    vmathSoaV3Neg( &tmpV3_5, &tmpV3_4 );
-    vmathSoaV3Copy( &result->col3, &tmpV3_5 );
-}
-
-static inline void vmathSoaT3OrthoInverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
-{
-    VmathSoaVector3 inv0, inv1, inv2, tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5;
-    vmathSoaV3MakeFromElems( &inv0, tfrm->col0.x, tfrm->col1.x, tfrm->col2.x );
-    vmathSoaV3MakeFromElems( &inv1, tfrm->col0.y, tfrm->col1.y, tfrm->col2.y );
-    vmathSoaV3MakeFromElems( &inv2, tfrm->col0.z, tfrm->col1.z, tfrm->col2.z );
-    vmathSoaV3Copy( &result->col0, &inv0 );
-    vmathSoaV3Copy( &result->col1, &inv1 );
-    vmathSoaV3Copy( &result->col2, &inv2 );
-    vmathSoaV3ScalarMul( &tmpV3_0, &inv0, tfrm->col3.x );
-    vmathSoaV3ScalarMul( &tmpV3_1, &inv1, tfrm->col3.y );
-    vmathSoaV3ScalarMul( &tmpV3_2, &inv2, tfrm->col3.z );
-    vmathSoaV3Add( &tmpV3_3, &tmpV3_1, &tmpV3_2 );
-    vmathSoaV3Add( &tmpV3_4, &tmpV3_0, &tmpV3_3 );
-    vmathSoaV3Neg( &tmpV3_5, &tmpV3_4 );
-    vmathSoaV3Copy( &result->col3, &tmpV3_5 );
-}
-
-static inline void vmathSoaT3AbsPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3AbsPerElem( &result->col0, &tfrm->col0 );
-    vmathSoaV3AbsPerElem( &result->col1, &tfrm->col1 );
-    vmathSoaV3AbsPerElem( &result->col2, &tfrm->col2 );
-    vmathSoaV3AbsPerElem( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathSoaT3MulV3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *vec )
-{
-    vec_float4 tmpX, tmpY, tmpZ;
-    tmpX = vec_add( vec_add( vec_madd( tfrm->col0.x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tfrm->col1.x, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tfrm->col2.x, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpY = vec_add( vec_add( vec_madd( tfrm->col0.y, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tfrm->col1.y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tfrm->col2.y, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpZ = vec_add( vec_add( vec_madd( tfrm->col0.z, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tfrm->col1.z, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tfrm->col2.z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathSoaT3MulP3( VmathSoaPoint3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaPoint3 *pnt )
-{
-    vec_float4 tmpX, tmpY, tmpZ;
-    tmpX = vec_add( vec_add( vec_add( vec_madd( tfrm->col0.x, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tfrm->col1.x, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tfrm->col2.x, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), tfrm->col3.x );
-    tmpY = vec_add( vec_add( vec_add( vec_madd( tfrm->col0.y, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tfrm->col1.y, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tfrm->col2.y, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), tfrm->col3.y );
-    tmpZ = vec_add( vec_add( vec_add( vec_madd( tfrm->col0.z, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tfrm->col1.z, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tfrm->col2.z, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), tfrm->col3.z );
-    vmathSoaP3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathSoaT3Mul( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 )
-{
-    VmathSoaTransform3 tmpResult;
-    VmathSoaPoint3 tmpP3_0, tmpP3_1;
-    vmathSoaT3MulV3( &tmpResult.col0, tfrm0, &tfrm1->col0 );
-    vmathSoaT3MulV3( &tmpResult.col1, tfrm0, &tfrm1->col1 );
-    vmathSoaT3MulV3( &tmpResult.col2, tfrm0, &tfrm1->col2 );
-    vmathSoaP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
-    vmathSoaT3MulP3( &tmpP3_1, tfrm0, &tmpP3_0 );
-    vmathSoaV3MakeFromP3( &tmpResult.col3, &tmpP3_1 );
-    vmathSoaT3Copy( result, &tmpResult );
-}
-
-static inline void vmathSoaT3MulPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 )
-{
-    vmathSoaV3MulPerElem( &result->col0, &tfrm0->col0, &tfrm1->col0 );
-    vmathSoaV3MulPerElem( &result->col1, &tfrm0->col1, &tfrm1->col1 );
-    vmathSoaV3MulPerElem( &result->col2, &tfrm0->col2, &tfrm1->col2 );
-    vmathSoaV3MulPerElem( &result->col3, &tfrm0->col3, &tfrm1->col3 );
-}
-
-static inline void vmathSoaT3MakeIdentity( VmathSoaTransform3 *result )
-{
-    vmathSoaV3MakeXAxis( &result->col0 );
-    vmathSoaV3MakeYAxis( &result->col1 );
-    vmathSoaV3MakeZAxis( &result->col2 );
-    vmathSoaV3MakeFromScalar( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaT3SetUpper3x3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *tfrm )
-{
-    vmathSoaV3Copy( &result->col0, &tfrm->col0 );
-    vmathSoaV3Copy( &result->col1, &tfrm->col1 );
-    vmathSoaV3Copy( &result->col2, &tfrm->col2 );
-}
-
-static inline void vmathSoaT3GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaM3MakeFromCols( result, &tfrm->col0, &tfrm->col1, &tfrm->col2 );
-}
-
-static inline void vmathSoaT3SetTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec )
-{
-    vmathSoaV3Copy( &result->col3, translateVec );
-}
-
-static inline void vmathSoaT3GetTranslation( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3Copy( result, &tfrm->col3 );
-}
-
-static inline void vmathSoaT3MakeRotationX( VmathSoaTransform3 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV3MakeXAxis( &result->col0 );
-    vmathSoaV3MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, s );
-    vmathSoaV3MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), c );
-    vmathSoaV3MakeFromScalar( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaT3MakeRotationY( VmathSoaTransform3 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV3MakeFromElems( &result->col0, c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ) );
-    vmathSoaV3MakeYAxis( &result->col1 );
-    vmathSoaV3MakeFromElems( &result->col2, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c );
-    vmathSoaV3MakeFromScalar( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaT3MakeRotationZ( VmathSoaTransform3 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV3MakeFromElems( &result->col0, c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV3MakeFromElems( &result->col1, negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV3MakeZAxis( &result->col2 );
-    vmathSoaV3MakeFromScalar( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaT3MakeRotationZYX( VmathSoaTransform3 *result, const VmathSoaVector3 *radiansXYZ )
-{
-    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sincosf4( radiansXYZ->x, &sX, &cX );
-    sincosf4( radiansXYZ->y, &sY, &cY );
-    sincosf4( radiansXYZ->z, &sZ, &cZ );
-    tmp0 = vec_madd( cZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmp1 = vec_madd( sZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV3MakeFromElems( &result->col0, vec_madd( cZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), negatef4( sY ) );
-    vmathSoaV3MakeFromElems( &result->col1, vec_sub( vec_madd( tmp0, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( tmp1, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV3MakeFromElems( &result->col2, vec_add( vec_madd( tmp0, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( tmp1, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV3MakeFromScalar( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaT3MakeRotationAxis( VmathSoaTransform3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
-{
-    VmathSoaMatrix3 tmpM3_0;
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaM3MakeRotationAxis( &tmpM3_0, radians, unitVec );
-    vmathSoaV3MakeFromScalar( &tmpV3_0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
-}
-
-static inline void vmathSoaT3MakeRotationQ( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat )
-{
-    VmathSoaMatrix3 tmpM3_0;
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaM3MakeFromQ( &tmpM3_0, unitQuat );
-    vmathSoaV3MakeFromScalar( &tmpV3_0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
-}
-
-static inline void vmathSoaT3MakeScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec )
-{
-    vmathSoaV3MakeFromElems( &result->col0, scaleVec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV3MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV3MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec->z );
-    vmathSoaV3MakeFromScalar( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaT3AppendScale( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *scaleVec )
-{
-    vmathSoaV3ScalarMul( &result->col0, &tfrm->col0, vmathSoaV3GetX( scaleVec ) );
-    vmathSoaV3ScalarMul( &result->col1, &tfrm->col1, vmathSoaV3GetY( scaleVec ) );
-    vmathSoaV3ScalarMul( &result->col2, &tfrm->col2, vmathSoaV3GetZ( scaleVec ) );
-    vmathSoaV3Copy( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathSoaT3PrependScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3MulPerElem( &result->col0, &tfrm->col0, scaleVec );
-    vmathSoaV3MulPerElem( &result->col1, &tfrm->col1, scaleVec );
-    vmathSoaV3MulPerElem( &result->col2, &tfrm->col2, scaleVec );
-    vmathSoaV3MulPerElem( &result->col3, &tfrm->col3, scaleVec );
-}
-
-static inline void vmathSoaT3MakeTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec )
-{
-    vmathSoaV3MakeXAxis( &result->col0 );
-    vmathSoaV3MakeYAxis( &result->col1 );
-    vmathSoaV3MakeZAxis( &result->col2 );
-    vmathSoaV3Copy( &result->col3, translateVec );
-}
-
-static inline void vmathSoaT3Select( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1, vec_uint4 select1 )
-{
-    vmathSoaV3Select( &result->col0, &tfrm0->col0, &tfrm1->col0, select1 );
-    vmathSoaV3Select( &result->col1, &tfrm0->col1, &tfrm1->col1, select1 );
-    vmathSoaV3Select( &result->col2, &tfrm0->col2, &tfrm1->col2, select1 );
-    vmathSoaV3Select( &result->col3, &tfrm0->col3, &tfrm1->col3, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaT3Print( const VmathSoaTransform3 *tfrm )
-{
-    VmathTransform3 mat0, mat1, mat2, mat3;
-    vmathSoaT3Get4Aos( tfrm, &mat0, &mat1, &mat2, &mat3 );
-    printf("slot 0:\n");
-    vmathT3Print( &mat0 );
-    printf("slot 1:\n");
-    vmathT3Print( &mat1 );
-    printf("slot 2:\n");
-    vmathT3Print( &mat2 );
-    printf("slot 3:\n");
-    vmathT3Print( &mat3 );
-}
-
-static inline void vmathSoaT3Prints( const VmathSoaTransform3 *tfrm, const char *name )
-{
-    printf("%s:\n", name);
-    vmathSoaT3Print( tfrm );
-}
-
-#endif
-
-static inline void vmathSoaQMakeFromM3( VmathSoaQuat *result, const VmathSoaMatrix3 *tfrm )
-{
-    vec_float4 trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
-    vec_uint4 negTrace, ZgtX, ZgtY, YgtX;
-    vec_uint4 largestXorY, largestYorZ, largestZorX;
-
-    xx = tfrm->col0.x;
-    yx = tfrm->col0.y;
-    zx = tfrm->col0.z;
-    xy = tfrm->col1.x;
-    yy = tfrm->col1.y;
-    zy = tfrm->col1.z;
-    xz = tfrm->col2.x;
-    yz = tfrm->col2.y;
-    zz = tfrm->col2.z;
-
-    trace = vec_add( vec_add( xx, yy ), zz );
-
-    negTrace = (vec_uint4)vec_cmpgt( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), trace );
-    ZgtX = (vec_uint4)vec_cmpgt( zz, xx );
-    ZgtY = (vec_uint4)vec_cmpgt( zz, yy );
-    YgtX = (vec_uint4)vec_cmpgt( yy, xx );
-    largestXorY = vec_andc( negTrace, vec_and( ZgtX, ZgtY ) );
-    largestYorZ = vec_and( negTrace, vec_or( YgtX, ZgtX ) );
-    largestZorX = vec_andc( negTrace, vec_andc( YgtX, ZgtY ) );
-    
-    zz = vec_sel( zz, negatef4(zz), largestXorY );
-    xy = vec_sel( xy, negatef4(xy), largestXorY );
-    xx = vec_sel( xx, negatef4(xx), largestYorZ );
-    yz = vec_sel( yz, negatef4(yz), largestYorZ );
-    yy = vec_sel( yy, negatef4(yy), largestZorX );
-    zx = vec_sel( zx, negatef4(zx), largestZorX );
-
-    radicand = vec_add( vec_add( vec_add( xx, yy ), zz ), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-    scale = vec_madd( ((vec_float4){0.5f,0.5f,0.5f,0.5f}), divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( radicand ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-
-    tmpx = vec_madd( vec_sub( zy, yz ), scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmpy = vec_madd( vec_sub( xz, zx ), scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmpz = vec_madd( vec_sub( yx, xy ), scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmpw = vec_madd( radicand, scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qx = tmpx;
-    qy = tmpy;
-    qz = tmpz;
-    qw = tmpw;
-
-    qx = vec_sel( qx, tmpw, largestXorY );
-    qy = vec_sel( qy, tmpz, largestXorY );
-    qz = vec_sel( qz, tmpy, largestXorY );
-    qw = vec_sel( qw, tmpx, largestXorY );
-    tmpx = qx;
-    tmpz = qz;
-    qx = vec_sel( qx, qy, largestYorZ );
-    qy = vec_sel( qy, tmpx, largestYorZ );
-    qz = vec_sel( qz, qw, largestYorZ );
-    qw = vec_sel( qw, tmpz, largestYorZ );
-
-    result->x = qx;
-    result->y = qy;
-    result->z = qz;
-    result->w = qw;
-}
-
-static inline void vmathSoaV3Outer( VmathSoaMatrix3 *result, const VmathSoaVector3 *tfrm0, const VmathSoaVector3 *tfrm1 )
-{
-    vmathSoaV3ScalarMul( &result->col0, tfrm0, vmathSoaV3GetX( tfrm1 ) );
-    vmathSoaV3ScalarMul( &result->col1, tfrm0, vmathSoaV3GetY( tfrm1 ) );
-    vmathSoaV3ScalarMul( &result->col2, tfrm0, vmathSoaV3GetZ( tfrm1 ) );
-}
-
-static inline void vmathSoaV4Outer( VmathSoaMatrix4 *result, const VmathSoaVector4 *tfrm0, const VmathSoaVector4 *tfrm1 )
-{
-    vmathSoaV4ScalarMul( &result->col0, tfrm0, vmathSoaV4GetX( tfrm1 ) );
-    vmathSoaV4ScalarMul( &result->col1, tfrm0, vmathSoaV4GetY( tfrm1 ) );
-    vmathSoaV4ScalarMul( &result->col2, tfrm0, vmathSoaV4GetZ( tfrm1 ) );
-    vmathSoaV4ScalarMul( &result->col3, tfrm0, vmathSoaV4GetW( tfrm1 ) );
-}
-
-static inline void vmathSoaV3RowMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat )
-{
-    vec_float4 tmpX, tmpY, tmpZ;
-    tmpX = vec_add( vec_add( vec_madd( vec->x, mat->col0.x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec->y, mat->col0.y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( vec->z, mat->col0.z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpY = vec_add( vec_add( vec_madd( vec->x, mat->col1.x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec->y, mat->col1.y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( vec->z, mat->col1.z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpZ = vec_add( vec_add( vec_madd( vec->x, mat->col2.x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec->y, mat->col2.y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( vec->z, mat->col2.z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathSoaV3CrossMatrix( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec )
-{
-    vmathSoaV3MakeFromElems( &result->col0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec->z, negatef4( vec->y ) );
-    vmathSoaV3MakeFromElems( &result->col1, negatef4( vec->z ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec->x );
-    vmathSoaV3MakeFromElems( &result->col2, vec->y, negatef4( vec->x ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaV3CrossMatrixMul( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat )
-{
-    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2;
-    vmathSoaV3Cross( &tmpV3_0, vec, &mat->col0 );
-    vmathSoaV3Cross( &tmpV3_1, vec, &mat->col1 );
-    vmathSoaV3Cross( &tmpV3_2, vec, &mat->col2 );
-    vmathSoaM3MakeFromCols( result, &tmpV3_0, &tmpV3_1, &tmpV3_2 );
-}
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_SOA_C_H
+#define _VECTORMATH_MAT_SOA_C_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ */
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+static inline void vmathSoaM3Copy( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3Copy( &result->col0, &mat->col0 );
+    vmathSoaV3Copy( &result->col1, &mat->col1 );
+    vmathSoaV3Copy( &result->col2, &mat->col2 );
+}
+
+static inline void vmathSoaM3MakeFromScalar( VmathSoaMatrix3 *result, vec_float4 scalar )
+{
+    vmathSoaV3MakeFromScalar( &result->col0, scalar );
+    vmathSoaV3MakeFromScalar( &result->col1, scalar );
+    vmathSoaV3MakeFromScalar( &result->col2, scalar );
+}
+
+static inline void vmathSoaM3MakeFromQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat )
+{
+    vec_float4 qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
+    qx = unitQuat->x;
+    qy = unitQuat->y;
+    qz = unitQuat->z;
+    qw = unitQuat->w;
+    qx2 = vec_add( qx, qx );
+    qy2 = vec_add( qy, qy );
+    qz2 = vec_add( qz, qz );
+    qxqx2 = vec_madd( qx, qx2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qxqy2 = vec_madd( qx, qy2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qxqz2 = vec_madd( qx, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qxqw2 = vec_madd( qw, qx2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qyqy2 = vec_madd( qy, qy2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qyqz2 = vec_madd( qy, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qyqw2 = vec_madd( qw, qy2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qzqz2 = vec_madd( qz, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qzqw2 = vec_madd( qw, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV3MakeFromElems( &result->col0, vec_sub( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), qyqy2 ), qzqz2 ), vec_add( qxqy2, qzqw2 ), vec_sub( qxqz2, qyqw2 ) );
+    vmathSoaV3MakeFromElems( &result->col1, vec_sub( qxqy2, qzqw2 ), vec_sub( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), qxqx2 ), qzqz2 ), vec_add( qyqz2, qxqw2 ) );
+    vmathSoaV3MakeFromElems( &result->col2, vec_add( qxqz2, qyqw2 ), vec_sub( qyqz2, qxqw2 ), vec_sub( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), qxqx2 ), qyqy2 ) );
+}
+
+static inline void vmathSoaM3MakeFromCols( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col0, const VmathSoaVector3 *_col1, const VmathSoaVector3 *_col2 )
+{
+    vmathSoaV3Copy( &result->col0, _col0 );
+    vmathSoaV3Copy( &result->col1, _col1 );
+    vmathSoaV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathSoaM3MakeFromAos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vmathSoaV3MakeFromAos( &result->col0, &mat->col0 );
+    vmathSoaV3MakeFromAos( &result->col1, &mat->col1 );
+    vmathSoaV3MakeFromAos( &result->col2, &mat->col2 );
+}
+
+static inline void vmathSoaM3MakeFrom4Aos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, const VmathMatrix3 *mat2, const VmathMatrix3 *mat3 )
+{
+    vmathSoaV3MakeFrom4Aos( &result->col0, &mat0->col0, &mat1->col0, &mat2->col0, &mat3->col0 );
+    vmathSoaV3MakeFrom4Aos( &result->col1, &mat0->col1, &mat1->col1, &mat2->col1, &mat3->col1 );
+    vmathSoaV3MakeFrom4Aos( &result->col2, &mat0->col2, &mat1->col2, &mat2->col2, &mat3->col2 );
+}
+
+static inline void vmathSoaM3Get4Aos( const VmathSoaMatrix3 *mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 )
+{
+    vmathSoaV3Get4Aos( &mat->col0, &result0->col0, &result1->col0, &result2->col0, &result3->col0 );
+    vmathSoaV3Get4Aos( &mat->col1, &result0->col1, &result1->col1, &result2->col1, &result3->col1 );
+    vmathSoaV3Get4Aos( &mat->col2, &result0->col2, &result1->col2, &result2->col2, &result3->col2 );
+}
+
+static inline void vmathSoaM3SetCol0( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col0 )
+{
+    vmathSoaV3Copy( &result->col0, _col0 );
+}
+
+static inline void vmathSoaM3SetCol1( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col1 )
+{
+    vmathSoaV3Copy( &result->col1, _col1 );
+}
+
+static inline void vmathSoaM3SetCol2( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col2 )
+{
+    vmathSoaV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathSoaM3SetCol( VmathSoaMatrix3 *result, int col, const VmathSoaVector3 *vec )
+{
+    vmathSoaV3Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathSoaM3SetRow( VmathSoaMatrix3 *result, int row, const VmathSoaVector3 *vec )
+{
+    vmathSoaV3SetElem( &result->col0, row, vmathSoaV3GetElem( vec, 0 ) );
+    vmathSoaV3SetElem( &result->col1, row, vmathSoaV3GetElem( vec, 1 ) );
+    vmathSoaV3SetElem( &result->col2, row, vmathSoaV3GetElem( vec, 2 ) );
+}
+
+static inline void vmathSoaM3SetElem( VmathSoaMatrix3 *result, int col, int row, vec_float4 val )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaM3GetCol( &tmpV3_0, result, col );
+    vmathSoaV3SetElem( &tmpV3_0, row, val );
+    vmathSoaM3SetCol( result, col, &tmpV3_0 );
+}
+
+static inline vec_float4 vmathSoaM3GetElem( const VmathSoaMatrix3 *mat, int col, int row )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaM3GetCol( &tmpV3_0, mat, col );
+    return vmathSoaV3GetElem( &tmpV3_0, row );
+}
+
+static inline void vmathSoaM3GetCol0( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3Copy( result, &mat->col0 );
+}
+
+static inline void vmathSoaM3GetCol1( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3Copy( result, &mat->col1 );
+}
+
+static inline void vmathSoaM3GetCol2( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3Copy( result, &mat->col2 );
+}
+
+static inline void vmathSoaM3GetCol( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int col )
+{
+    vmathSoaV3Copy( result, (&mat->col0 + col) );
+}
+
+static inline void vmathSoaM3GetRow( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int row )
+{
+    vmathSoaV3MakeFromElems( result, vmathSoaV3GetElem( &mat->col0, row ), vmathSoaV3GetElem( &mat->col1, row ), vmathSoaV3GetElem( &mat->col2, row ) );
+}
+
+static inline void vmathSoaM3Transpose( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
+{
+    VmathSoaMatrix3 tmpResult;
+    vmathSoaV3MakeFromElems( &tmpResult.col0, mat->col0.x, mat->col1.x, mat->col2.x );
+    vmathSoaV3MakeFromElems( &tmpResult.col1, mat->col0.y, mat->col1.y, mat->col2.y );
+    vmathSoaV3MakeFromElems( &tmpResult.col2, mat->col0.z, mat->col1.z, mat->col2.z );
+    vmathSoaM3Copy( result, &tmpResult );
+}
+
+static inline void vmathSoaM3Inverse( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
+{
+    VmathSoaVector3 tmp0, tmp1, tmp2;
+    vec_float4 detinv;
+    vmathSoaV3Cross( &tmp0, &mat->col1, &mat->col2 );
+    vmathSoaV3Cross( &tmp1, &mat->col2, &mat->col0 );
+    vmathSoaV3Cross( &tmp2, &mat->col0, &mat->col1 );
+    detinv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vmathSoaV3Dot( &mat->col2, &tmp2 ) );
+    vmathSoaV3MakeFromElems( &result->col0, vec_madd( tmp0.x, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.x, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.x, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV3MakeFromElems( &result->col1, vec_madd( tmp0.y, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.y, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.y, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV3MakeFromElems( &result->col2, vec_madd( tmp0.z, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.z, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.z, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+static inline vec_float4 vmathSoaM3Determinant( const VmathSoaMatrix3 *mat )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaV3Cross( &tmpV3_0, &mat->col0, &mat->col1 );
+    return vmathSoaV3Dot( &mat->col2, &tmpV3_0 );
+}
+
+static inline void vmathSoaM3Add( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
+{
+    vmathSoaV3Add( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathSoaV3Add( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathSoaV3Add( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathSoaM3Sub( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
+{
+    vmathSoaV3Sub( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathSoaV3Sub( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathSoaV3Sub( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathSoaM3Neg( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3Neg( &result->col0, &mat->col0 );
+    vmathSoaV3Neg( &result->col1, &mat->col1 );
+    vmathSoaV3Neg( &result->col2, &mat->col2 );
+}
+
+static inline void vmathSoaM3AbsPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3AbsPerElem( &result->col0, &mat->col0 );
+    vmathSoaV3AbsPerElem( &result->col1, &mat->col1 );
+    vmathSoaV3AbsPerElem( &result->col2, &mat->col2 );
+}
+
+static inline void vmathSoaM3ScalarMul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, vec_float4 scalar )
+{
+    vmathSoaV3ScalarMul( &result->col0, &mat->col0, scalar );
+    vmathSoaV3ScalarMul( &result->col1, &mat->col1, scalar );
+    vmathSoaV3ScalarMul( &result->col2, &mat->col2, scalar );
+}
+
+static inline void vmathSoaM3MulV3( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *vec )
+{
+    vec_float4 tmpX, tmpY, tmpZ;
+    tmpX = vec_add( vec_add( vec_madd( mat->col0.x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.x, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.x, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpY = vec_add( vec_add( vec_madd( mat->col0.y, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.y, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpZ = vec_add( vec_add( vec_madd( mat->col0.z, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.z, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathSoaM3Mul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
+{
+    VmathSoaMatrix3 tmpResult;
+    vmathSoaM3MulV3( &tmpResult.col0, mat0, &mat1->col0 );
+    vmathSoaM3MulV3( &tmpResult.col1, mat0, &mat1->col1 );
+    vmathSoaM3MulV3( &tmpResult.col2, mat0, &mat1->col2 );
+    vmathSoaM3Copy( result, &tmpResult );
+}
+
+static inline void vmathSoaM3MulPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
+{
+    vmathSoaV3MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathSoaV3MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathSoaV3MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathSoaM3MakeIdentity( VmathSoaMatrix3 *result )
+{
+    vmathSoaV3MakeXAxis( &result->col0 );
+    vmathSoaV3MakeYAxis( &result->col1 );
+    vmathSoaV3MakeZAxis( &result->col2 );
+}
+
+static inline void vmathSoaM3MakeRotationX( VmathSoaMatrix3 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV3MakeXAxis( &result->col0 );
+    vmathSoaV3MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, s );
+    vmathSoaV3MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), c );
+}
+
+static inline void vmathSoaM3MakeRotationY( VmathSoaMatrix3 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV3MakeFromElems( &result->col0, c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ) );
+    vmathSoaV3MakeYAxis( &result->col1 );
+    vmathSoaV3MakeFromElems( &result->col2, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c );
+}
+
+static inline void vmathSoaM3MakeRotationZ( VmathSoaMatrix3 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV3MakeFromElems( &result->col0, c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV3MakeFromElems( &result->col1, negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV3MakeZAxis( &result->col2 );
+}
+
+static inline void vmathSoaM3MakeRotationZYX( VmathSoaMatrix3 *result, const VmathSoaVector3 *radiansXYZ )
+{
+    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sincosf4( radiansXYZ->x, &sX, &cX );
+    sincosf4( radiansXYZ->y, &sY, &cY );
+    sincosf4( radiansXYZ->z, &sZ, &cZ );
+    tmp0 = vec_madd( cZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmp1 = vec_madd( sZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV3MakeFromElems( &result->col0, vec_madd( cZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), negatef4( sY ) );
+    vmathSoaV3MakeFromElems( &result->col1, vec_sub( vec_madd( tmp0, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( tmp1, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV3MakeFromElems( &result->col2, vec_add( vec_madd( tmp0, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( tmp1, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+static inline void vmathSoaM3MakeRotationAxis( VmathSoaMatrix3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
+{
+    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
+    sincosf4( radians, &s, &c );
+    x = unitVec->x;
+    y = unitVec->y;
+    z = unitVec->z;
+    xy = vec_madd( x, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    yz = vec_madd( y, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    zx = vec_madd( z, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
+    vmathSoaV3MakeFromElems( &result->col0, vec_add( vec_madd( vec_madd( x, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    vmathSoaV3MakeFromElems( &result->col1, vec_sub( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( y, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    vmathSoaV3MakeFromElems( &result->col2, vec_add( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( z, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ) );
+}
+
+static inline void vmathSoaM3MakeRotationQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat )
+{
+    vmathSoaM3MakeFromQ( result, unitQuat );
+}
+
+static inline void vmathSoaM3MakeScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec )
+{
+    vmathSoaV3MakeFromElems( &result->col0, scaleVec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV3MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV3MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec->z );
+}
+
+static inline void vmathSoaM3AppendScale( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *scaleVec )
+{
+    vmathSoaV3ScalarMul( &result->col0, &mat->col0, vmathSoaV3GetX( scaleVec ) );
+    vmathSoaV3ScalarMul( &result->col1, &mat->col1, vmathSoaV3GetY( scaleVec ) );
+    vmathSoaV3ScalarMul( &result->col2, &mat->col2, vmathSoaV3GetZ( scaleVec ) );
+}
+
+static inline void vmathSoaM3PrependScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3MulPerElem( &result->col0, &mat->col0, scaleVec );
+    vmathSoaV3MulPerElem( &result->col1, &mat->col1, scaleVec );
+    vmathSoaV3MulPerElem( &result->col2, &mat->col2, scaleVec );
+}
+
+static inline void vmathSoaM3Select( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1, vec_uint4 select1 )
+{
+    vmathSoaV3Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
+    vmathSoaV3Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
+    vmathSoaV3Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaM3Print( const VmathSoaMatrix3 *mat )
+{
+    VmathMatrix3 mat0, mat1, mat2, mat3;
+    vmathSoaM3Get4Aos( mat, &mat0, &mat1, &mat2, &mat3 );
+    printf("slot 0:\n");
+    vmathM3Print( &mat0 );
+    printf("slot 1:\n");
+    vmathM3Print( &mat1 );
+    printf("slot 2:\n");
+    vmathM3Print( &mat2 );
+    printf("slot 3:\n");
+    vmathM3Print( &mat3 );
+}
+
+static inline void vmathSoaM3Prints( const VmathSoaMatrix3 *mat, const char *name )
+{
+    printf("%s:\n", name);
+    vmathSoaM3Print( mat );
+}
+
+#endif
+
+static inline void vmathSoaM4Copy( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4Copy( &result->col0, &mat->col0 );
+    vmathSoaV4Copy( &result->col1, &mat->col1 );
+    vmathSoaV4Copy( &result->col2, &mat->col2 );
+    vmathSoaV4Copy( &result->col3, &mat->col3 );
+}
+
+static inline void vmathSoaM4MakeFromScalar( VmathSoaMatrix4 *result, vec_float4 scalar )
+{
+    vmathSoaV4MakeFromScalar( &result->col0, scalar );
+    vmathSoaV4MakeFromScalar( &result->col1, scalar );
+    vmathSoaV4MakeFromScalar( &result->col2, scalar );
+    vmathSoaV4MakeFromScalar( &result->col3, scalar );
+}
+
+static inline void vmathSoaM4MakeFromT3( VmathSoaMatrix4 *result, const VmathSoaTransform3 *mat )
+{
+    vmathSoaV4MakeFromV3Scalar( &result->col0, &mat->col0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromV3Scalar( &result->col1, &mat->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromV3Scalar( &result->col2, &mat->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromV3Scalar( &result->col3, &mat->col3, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+static inline void vmathSoaM4MakeFromCols( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col0, const VmathSoaVector4 *_col1, const VmathSoaVector4 *_col2, const VmathSoaVector4 *_col3 )
+{
+    vmathSoaV4Copy( &result->col0, _col0 );
+    vmathSoaV4Copy( &result->col1, _col1 );
+    vmathSoaV4Copy( &result->col2, _col2 );
+    vmathSoaV4Copy( &result->col3, _col3 );
+}
+
+static inline void vmathSoaM4MakeFromM3V3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *translateVec )
+{
+    vmathSoaV4MakeFromV3Scalar( &result->col0, &mat->col0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromV3Scalar( &result->col1, &mat->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromV3Scalar( &result->col2, &mat->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromV3Scalar( &result->col3, translateVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+static inline void vmathSoaM4MakeFromQV3( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec )
+{
+    VmathSoaMatrix3 mat;
+    vmathSoaM3MakeFromQ( &mat, unitQuat );
+    vmathSoaV4MakeFromV3Scalar( &result->col0, &mat.col0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromV3Scalar( &result->col1, &mat.col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromV3Scalar( &result->col2, &mat.col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromV3Scalar( &result->col3, translateVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+static inline void vmathSoaM4MakeFromAos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vmathSoaV4MakeFromAos( &result->col0, &mat->col0 );
+    vmathSoaV4MakeFromAos( &result->col1, &mat->col1 );
+    vmathSoaV4MakeFromAos( &result->col2, &mat->col2 );
+    vmathSoaV4MakeFromAos( &result->col3, &mat->col3 );
+}
+
+static inline void vmathSoaM4MakeFrom4Aos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, const VmathMatrix4 *mat2, const VmathMatrix4 *mat3 )
+{
+    vmathSoaV4MakeFrom4Aos( &result->col0, &mat0->col0, &mat1->col0, &mat2->col0, &mat3->col0 );
+    vmathSoaV4MakeFrom4Aos( &result->col1, &mat0->col1, &mat1->col1, &mat2->col1, &mat3->col1 );
+    vmathSoaV4MakeFrom4Aos( &result->col2, &mat0->col2, &mat1->col2, &mat2->col2, &mat3->col2 );
+    vmathSoaV4MakeFrom4Aos( &result->col3, &mat0->col3, &mat1->col3, &mat2->col3, &mat3->col3 );
+}
+
+static inline void vmathSoaM4Get4Aos( const VmathSoaMatrix4 *mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 )
+{
+    vmathSoaV4Get4Aos( &mat->col0, &result0->col0, &result1->col0, &result2->col0, &result3->col0 );
+    vmathSoaV4Get4Aos( &mat->col1, &result0->col1, &result1->col1, &result2->col1, &result3->col1 );
+    vmathSoaV4Get4Aos( &mat->col2, &result0->col2, &result1->col2, &result2->col2, &result3->col2 );
+    vmathSoaV4Get4Aos( &mat->col3, &result0->col3, &result1->col3, &result2->col3, &result3->col3 );
+}
+
+static inline void vmathSoaM4SetCol0( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col0 )
+{
+    vmathSoaV4Copy( &result->col0, _col0 );
+}
+
+static inline void vmathSoaM4SetCol1( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col1 )
+{
+    vmathSoaV4Copy( &result->col1, _col1 );
+}
+
+static inline void vmathSoaM4SetCol2( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col2 )
+{
+    vmathSoaV4Copy( &result->col2, _col2 );
+}
+
+static inline void vmathSoaM4SetCol3( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col3 )
+{
+    vmathSoaV4Copy( &result->col3, _col3 );
+}
+
+static inline void vmathSoaM4SetCol( VmathSoaMatrix4 *result, int col, const VmathSoaVector4 *vec )
+{
+    vmathSoaV4Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathSoaM4SetRow( VmathSoaMatrix4 *result, int row, const VmathSoaVector4 *vec )
+{
+    vmathSoaV4SetElem( &result->col0, row, vmathSoaV4GetElem( vec, 0 ) );
+    vmathSoaV4SetElem( &result->col1, row, vmathSoaV4GetElem( vec, 1 ) );
+    vmathSoaV4SetElem( &result->col2, row, vmathSoaV4GetElem( vec, 2 ) );
+    vmathSoaV4SetElem( &result->col3, row, vmathSoaV4GetElem( vec, 3 ) );
+}
+
+static inline void vmathSoaM4SetElem( VmathSoaMatrix4 *result, int col, int row, vec_float4 val )
+{
+    VmathSoaVector4 tmpV3_0;
+    vmathSoaM4GetCol( &tmpV3_0, result, col );
+    vmathSoaV4SetElem( &tmpV3_0, row, val );
+    vmathSoaM4SetCol( result, col, &tmpV3_0 );
+}
+
+static inline vec_float4 vmathSoaM4GetElem( const VmathSoaMatrix4 *mat, int col, int row )
+{
+    VmathSoaVector4 tmpV4_0;
+    vmathSoaM4GetCol( &tmpV4_0, mat, col );
+    return vmathSoaV4GetElem( &tmpV4_0, row );
+}
+
+static inline void vmathSoaM4GetCol0( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4Copy( result, &mat->col0 );
+}
+
+static inline void vmathSoaM4GetCol1( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4Copy( result, &mat->col1 );
+}
+
+static inline void vmathSoaM4GetCol2( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4Copy( result, &mat->col2 );
+}
+
+static inline void vmathSoaM4GetCol3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4Copy( result, &mat->col3 );
+}
+
+static inline void vmathSoaM4GetCol( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int col )
+{
+    vmathSoaV4Copy( result, (&mat->col0 + col) );
+}
+
+static inline void vmathSoaM4GetRow( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int row )
+{
+    vmathSoaV4MakeFromElems( result, vmathSoaV4GetElem( &mat->col0, row ), vmathSoaV4GetElem( &mat->col1, row ), vmathSoaV4GetElem( &mat->col2, row ), vmathSoaV4GetElem( &mat->col3, row ) );
+}
+
+static inline void vmathSoaM4Transpose( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    VmathSoaMatrix4 tmpResult;
+    vmathSoaV4MakeFromElems( &tmpResult.col0, mat->col0.x, mat->col1.x, mat->col2.x, mat->col3.x );
+    vmathSoaV4MakeFromElems( &tmpResult.col1, mat->col0.y, mat->col1.y, mat->col2.y, mat->col3.y );
+    vmathSoaV4MakeFromElems( &tmpResult.col2, mat->col0.z, mat->col1.z, mat->col2.z, mat->col3.z );
+    vmathSoaV4MakeFromElems( &tmpResult.col3, mat->col0.w, mat->col1.w, mat->col2.w, mat->col3.w );
+    vmathSoaM4Copy( result, &tmpResult );
+}
+
+static inline void vmathSoaM4Inverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    VmathSoaVector4 res0, res1, res2, res3;
+    vec_float4 mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
+    mA = mat->col0.x;
+    mB = mat->col0.y;
+    mC = mat->col0.z;
+    mD = mat->col0.w;
+    mE = mat->col1.x;
+    mF = mat->col1.y;
+    mG = mat->col1.z;
+    mH = mat->col1.w;
+    mI = mat->col2.x;
+    mJ = mat->col2.y;
+    mK = mat->col2.z;
+    mL = mat->col2.w;
+    mM = mat->col3.x;
+    mN = mat->col3.y;
+    mO = mat->col3.z;
+    mP = mat->col3.w;
+    tmp0 = vec_sub( vec_madd( mK, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp1 = vec_sub( vec_madd( mO, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp2 = vec_sub( vec_madd( mB, mK, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mJ, mC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp3 = vec_sub( vec_madd( mF, mO, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mN, mG, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp4 = vec_sub( vec_madd( mJ, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mB, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp5 = vec_sub( vec_madd( mN, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mF, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetX( &res0, vec_sub( vec_sub( vec_madd( mJ, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mL, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mK, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    vmathSoaV4SetY( &res0, vec_sub( vec_sub( vec_madd( mN, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mP, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mO, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    vmathSoaV4SetZ( &res0, vec_sub( vec_add( vec_madd( mD, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mB, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    vmathSoaV4SetW( &res0, vec_sub( vec_add( vec_madd( mH, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mF, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    detInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_add( vec_add( vec_add( vec_madd( mA, res0.x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, res0.y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mI, res0.z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mM, res0.w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    vmathSoaV4SetX( &res1, vec_madd( mI, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetY( &res1, vec_madd( mM, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetZ( &res1, vec_madd( mA, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetW( &res1, vec_madd( mE, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetX( &res3, vec_madd( mI, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetY( &res3, vec_madd( mM, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetZ( &res3, vec_madd( mA, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetW( &res3, vec_madd( mE, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetX( &res2, vec_madd( mI, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetY( &res2, vec_madd( mM, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetZ( &res2, vec_madd( mA, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetW( &res2, vec_madd( mE, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp0 = vec_sub( vec_madd( mI, mB, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mA, mJ, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp1 = vec_sub( vec_madd( mM, mF, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, mN, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp2 = vec_sub( vec_madd( mI, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mA, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp3 = vec_sub( vec_madd( mM, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp4 = vec_sub( vec_madd( mI, mC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mA, mK, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp5 = vec_sub( vec_madd( mM, mG, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, mO, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetX( &res2, vec_add( vec_sub( vec_madd( mL, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mJ, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.x ) );
+    vmathSoaV4SetY( &res2, vec_add( vec_sub( vec_madd( mP, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mN, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.y ) );
+    vmathSoaV4SetZ( &res2, vec_sub( vec_sub( vec_madd( mB, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mD, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.z ) );
+    vmathSoaV4SetW( &res2, vec_sub( vec_sub( vec_madd( mF, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mH, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.w ) );
+    vmathSoaV4SetX( &res3, vec_add( vec_sub( vec_madd( mJ, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mK, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.x ) );
+    vmathSoaV4SetY( &res3, vec_add( vec_sub( vec_madd( mN, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mO, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.y ) );
+    vmathSoaV4SetZ( &res3, vec_sub( vec_sub( vec_madd( mC, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mB, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.z ) );
+    vmathSoaV4SetW( &res3, vec_sub( vec_sub( vec_madd( mG, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mF, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.w ) );
+    vmathSoaV4SetX( &res1, vec_sub( vec_sub( vec_madd( mK, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mL, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.x ) );
+    vmathSoaV4SetY( &res1, vec_sub( vec_sub( vec_madd( mO, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mP, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.y ) );
+    vmathSoaV4SetZ( &res1, vec_add( vec_sub( vec_madd( mD, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.z ) );
+    vmathSoaV4SetW( &res1, vec_add( vec_sub( vec_madd( mH, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.w ) );
+    vmathSoaV4ScalarMul( &result->col0, &res0, detInv );
+    vmathSoaV4ScalarMul( &result->col1, &res1, detInv );
+    vmathSoaV4ScalarMul( &result->col2, &res2, detInv );
+    vmathSoaV4ScalarMul( &result->col3, &res3, detInv );
+}
+
+static inline void vmathSoaM4AffineInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    VmathSoaTransform3 affineMat, tmpT3_0;
+    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    vmathSoaV4GetXYZ( &tmpV3_0, &mat->col0 );
+    vmathSoaT3SetCol0( &affineMat, &tmpV3_0 );
+    vmathSoaV4GetXYZ( &tmpV3_1, &mat->col1 );
+    vmathSoaT3SetCol1( &affineMat, &tmpV3_1 );
+    vmathSoaV4GetXYZ( &tmpV3_2, &mat->col2 );
+    vmathSoaT3SetCol2( &affineMat, &tmpV3_2 );
+    vmathSoaV4GetXYZ( &tmpV3_3, &mat->col3 );
+    vmathSoaT3SetCol3( &affineMat, &tmpV3_3 );
+    vmathSoaT3Inverse( &tmpT3_0, &affineMat );
+    vmathSoaM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline void vmathSoaM4OrthoInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    VmathSoaTransform3 affineMat, tmpT3_0;
+    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    vmathSoaV4GetXYZ( &tmpV3_0, &mat->col0 );
+    vmathSoaT3SetCol0( &affineMat, &tmpV3_0 );
+    vmathSoaV4GetXYZ( &tmpV3_1, &mat->col1 );
+    vmathSoaT3SetCol1( &affineMat, &tmpV3_1 );
+    vmathSoaV4GetXYZ( &tmpV3_2, &mat->col2 );
+    vmathSoaT3SetCol2( &affineMat, &tmpV3_2 );
+    vmathSoaV4GetXYZ( &tmpV3_3, &mat->col3 );
+    vmathSoaT3SetCol3( &affineMat, &tmpV3_3 );
+    vmathSoaT3OrthoInverse( &tmpT3_0, &affineMat );
+    vmathSoaM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline vec_float4 vmathSoaM4Determinant( const VmathSoaMatrix4 *mat )
+{
+    vec_float4 dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    mA = mat->col0.x;
+    mB = mat->col0.y;
+    mC = mat->col0.z;
+    mD = mat->col0.w;
+    mE = mat->col1.x;
+    mF = mat->col1.y;
+    mG = mat->col1.z;
+    mH = mat->col1.w;
+    mI = mat->col2.x;
+    mJ = mat->col2.y;
+    mK = mat->col2.z;
+    mL = mat->col2.w;
+    mM = mat->col3.x;
+    mN = mat->col3.y;
+    mO = mat->col3.z;
+    mP = mat->col3.w;
+    tmp0 = vec_sub( vec_madd( mK, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp1 = vec_sub( vec_madd( mO, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp2 = vec_sub( vec_madd( mB, mK, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mJ, mC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp3 = vec_sub( vec_madd( mF, mO, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mN, mG, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp4 = vec_sub( vec_madd( mJ, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mB, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp5 = vec_sub( vec_madd( mN, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mF, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    dx = vec_sub( vec_sub( vec_madd( mJ, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mL, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mK, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    dy = vec_sub( vec_sub( vec_madd( mN, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mP, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mO, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    dz = vec_sub( vec_add( vec_madd( mD, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mB, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    dw = vec_sub( vec_add( vec_madd( mH, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mF, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return vec_add( vec_add( vec_add( vec_madd( mA, dx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, dy, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mI, dz, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mM, dw, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+static inline void vmathSoaM4Add( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
+{
+    vmathSoaV4Add( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathSoaV4Add( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathSoaV4Add( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathSoaV4Add( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathSoaM4Sub( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
+{
+    vmathSoaV4Sub( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathSoaV4Sub( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathSoaV4Sub( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathSoaV4Sub( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathSoaM4Neg( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4Neg( &result->col0, &mat->col0 );
+    vmathSoaV4Neg( &result->col1, &mat->col1 );
+    vmathSoaV4Neg( &result->col2, &mat->col2 );
+    vmathSoaV4Neg( &result->col3, &mat->col3 );
+}
+
+static inline void vmathSoaM4AbsPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4AbsPerElem( &result->col0, &mat->col0 );
+    vmathSoaV4AbsPerElem( &result->col1, &mat->col1 );
+    vmathSoaV4AbsPerElem( &result->col2, &mat->col2 );
+    vmathSoaV4AbsPerElem( &result->col3, &mat->col3 );
+}
+
+static inline void vmathSoaM4ScalarMul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, vec_float4 scalar )
+{
+    vmathSoaV4ScalarMul( &result->col0, &mat->col0, scalar );
+    vmathSoaV4ScalarMul( &result->col1, &mat->col1, scalar );
+    vmathSoaV4ScalarMul( &result->col2, &mat->col2, scalar );
+    vmathSoaV4ScalarMul( &result->col3, &mat->col3, scalar );
+}
+
+static inline void vmathSoaM4MulV4( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector4 *vec )
+{
+    vec_float4 tmpX, tmpY, tmpZ, tmpW;
+    tmpX = vec_add( vec_add( vec_add( vec_madd( mat->col0.x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.x, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.x, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col3.x, vec->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpY = vec_add( vec_add( vec_add( vec_madd( mat->col0.y, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.y, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col3.y, vec->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpZ = vec_add( vec_add( vec_add( vec_madd( mat->col0.z, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.z, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col3.z, vec->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpW = vec_add( vec_add( vec_add( vec_madd( mat->col0.w, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.w, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.w, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col3.w, vec->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4MakeFromElems( result, tmpX, tmpY, tmpZ, tmpW );
+}
+
+static inline void vmathSoaM4MulV3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *vec )
+{
+    result->x = vec_add( vec_add( vec_madd( mat->col0.x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.x, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.x, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result->y = vec_add( vec_add( vec_madd( mat->col0.y, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.y, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result->z = vec_add( vec_add( vec_madd( mat->col0.z, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.z, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result->w = vec_add( vec_add( vec_madd( mat->col0.w, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.w, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.w, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+static inline void vmathSoaM4MulP3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaPoint3 *pnt )
+{
+    result->x = vec_add( vec_add( vec_add( vec_madd( mat->col0.x, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.x, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.x, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mat->col3.x );
+    result->y = vec_add( vec_add( vec_add( vec_madd( mat->col0.y, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.y, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.y, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mat->col3.y );
+    result->z = vec_add( vec_add( vec_add( vec_madd( mat->col0.z, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.z, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.z, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mat->col3.z );
+    result->w = vec_add( vec_add( vec_add( vec_madd( mat->col0.w, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.w, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.w, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mat->col3.w );
+}
+
+static inline void vmathSoaM4Mul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
+{
+    VmathSoaMatrix4 tmpResult;
+    vmathSoaM4MulV4( &tmpResult.col0, mat0, &mat1->col0 );
+    vmathSoaM4MulV4( &tmpResult.col1, mat0, &mat1->col1 );
+    vmathSoaM4MulV4( &tmpResult.col2, mat0, &mat1->col2 );
+    vmathSoaM4MulV4( &tmpResult.col3, mat0, &mat1->col3 );
+    vmathSoaM4Copy( result, &tmpResult );
+}
+
+static inline void vmathSoaM4MulT3( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaTransform3 *tfrm1 )
+{
+    VmathSoaMatrix4 tmpResult;
+    VmathSoaPoint3 tmpP3_0;
+    vmathSoaM4MulV3( &tmpResult.col0, mat, &tfrm1->col0 );
+    vmathSoaM4MulV3( &tmpResult.col1, mat, &tfrm1->col1 );
+    vmathSoaM4MulV3( &tmpResult.col2, mat, &tfrm1->col2 );
+    vmathSoaP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
+    vmathSoaM4MulP3( &tmpResult.col3, mat, &tmpP3_0 );
+    vmathSoaM4Copy( result, &tmpResult );
+}
+
+static inline void vmathSoaM4MulPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
+{
+    vmathSoaV4MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathSoaV4MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathSoaV4MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathSoaV4MulPerElem( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathSoaM4MakeIdentity( VmathSoaMatrix4 *result )
+{
+    vmathSoaV4MakeXAxis( &result->col0 );
+    vmathSoaV4MakeYAxis( &result->col1 );
+    vmathSoaV4MakeZAxis( &result->col2 );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4SetUpper3x3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat3 )
+{
+    vmathSoaV4SetXYZ( &result->col0, &mat3->col0 );
+    vmathSoaV4SetXYZ( &result->col1, &mat3->col1 );
+    vmathSoaV4SetXYZ( &result->col2, &mat3->col2 );
+}
+
+static inline void vmathSoaM4GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4GetXYZ( &result->col0, &mat->col0 );
+    vmathSoaV4GetXYZ( &result->col1, &mat->col1 );
+    vmathSoaV4GetXYZ( &result->col2, &mat->col2 );
+}
+
+static inline void vmathSoaM4SetTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec )
+{
+    vmathSoaV4SetXYZ( &result->col3, translateVec );
+}
+
+static inline void vmathSoaM4GetTranslation( VmathSoaVector3 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4GetXYZ( result, &mat->col3 );
+}
+
+static inline void vmathSoaM4MakeRotationX( VmathSoaMatrix4 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV4MakeXAxis( &result->col0 );
+    vmathSoaV4MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4MakeRotationY( VmathSoaMatrix4 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV4MakeFromElems( &result->col0, c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeYAxis( &result->col1 );
+    vmathSoaV4MakeFromElems( &result->col2, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4MakeRotationZ( VmathSoaMatrix4 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV4MakeFromElems( &result->col0, c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col1, negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeZAxis( &result->col2 );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4MakeRotationZYX( VmathSoaMatrix4 *result, const VmathSoaVector3 *radiansXYZ )
+{
+    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sincosf4( radiansXYZ->x, &sX, &cX );
+    sincosf4( radiansXYZ->y, &sY, &cY );
+    sincosf4( radiansXYZ->z, &sZ, &cZ );
+    tmp0 = vec_madd( cZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmp1 = vec_madd( sZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col0, vec_madd( cZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), negatef4( sY ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col1, vec_sub( vec_madd( tmp0, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( tmp1, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col2, vec_add( vec_madd( tmp0, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( tmp1, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4MakeRotationAxis( VmathSoaMatrix4 *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
+{
+    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
+    sincosf4( radians, &s, &c );
+    x = unitVec->x;
+    y = unitVec->y;
+    z = unitVec->z;
+    xy = vec_madd( x, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    yz = vec_madd( y, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    zx = vec_madd( z, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
+    vmathSoaV4MakeFromElems( &result->col0, vec_add( vec_madd( vec_madd( x, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col1, vec_sub( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( y, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col2, vec_add( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( z, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4MakeRotationQ( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat )
+{
+    VmathSoaTransform3 tmpT3_0;
+    vmathSoaT3MakeRotationQ( &tmpT3_0, unitQuat );
+    vmathSoaM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline void vmathSoaM4MakeScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec )
+{
+    vmathSoaV4MakeFromElems( &result->col0, scaleVec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4AppendScale( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *scaleVec )
+{
+    vmathSoaV4ScalarMul( &result->col0, &mat->col0, vmathSoaV3GetX( scaleVec ) );
+    vmathSoaV4ScalarMul( &result->col1, &mat->col1, vmathSoaV3GetY( scaleVec ) );
+    vmathSoaV4ScalarMul( &result->col2, &mat->col2, vmathSoaV3GetZ( scaleVec ) );
+    vmathSoaV4Copy( &result->col3, &mat->col3 );
+}
+
+static inline void vmathSoaM4PrependScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix4 *mat )
+{
+    VmathSoaVector4 scale4;
+    vmathSoaV4MakeFromV3Scalar( &scale4, scaleVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+    vmathSoaV4MulPerElem( &result->col0, &mat->col0, &scale4 );
+    vmathSoaV4MulPerElem( &result->col1, &mat->col1, &scale4 );
+    vmathSoaV4MulPerElem( &result->col2, &mat->col2, &scale4 );
+    vmathSoaV4MulPerElem( &result->col3, &mat->col3, &scale4 );
+}
+
+static inline void vmathSoaM4MakeTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec )
+{
+    vmathSoaV4MakeXAxis( &result->col0 );
+    vmathSoaV4MakeYAxis( &result->col1 );
+    vmathSoaV4MakeZAxis( &result->col2 );
+    vmathSoaV4MakeFromV3Scalar( &result->col3, translateVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+static inline void vmathSoaM4MakeLookAt( VmathSoaMatrix4 *result, const VmathSoaPoint3 *eyePos, const VmathSoaPoint3 *lookAtPos, const VmathSoaVector3 *upVec )
+{
+    VmathSoaMatrix4 m4EyeFrame;
+    VmathSoaVector3 v3X, v3Y, v3Z, tmpV3_0, tmpV3_1;
+    VmathSoaVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
+    vmathSoaV3Normalize( &v3Y, upVec );
+    vmathSoaP3Sub( &tmpV3_0, eyePos, lookAtPos );
+    vmathSoaV3Normalize( &v3Z, &tmpV3_0 );
+    vmathSoaV3Cross( &tmpV3_1, &v3Y, &v3Z );
+    vmathSoaV3Normalize( &v3X, &tmpV3_1 );
+    vmathSoaV3Cross( &v3Y, &v3Z, &v3X );
+    vmathSoaV4MakeFromV3( &tmpV4_0, &v3X );
+    vmathSoaV4MakeFromV3( &tmpV4_1, &v3Y );
+    vmathSoaV4MakeFromV3( &tmpV4_2, &v3Z );
+    vmathSoaV4MakeFromP3( &tmpV4_3, eyePos );
+    vmathSoaM4MakeFromCols( &m4EyeFrame, &tmpV4_0, &tmpV4_1, &tmpV4_2, &tmpV4_3 );
+    vmathSoaM4OrthoInverse( result, &m4EyeFrame );
+}
+
+static inline void vmathSoaM4MakePerspective( VmathSoaMatrix4 *result, vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar )
+{
+    vec_float4 f, rangeInv;
+    f = tanf4( vec_sub( ((vec_float4){_VECTORMATH_PI_OVER_2,_VECTORMATH_PI_OVER_2,_VECTORMATH_PI_OVER_2,_VECTORMATH_PI_OVER_2}), vec_madd( ((vec_float4){0.5f,0.5f,0.5f,0.5f}), fovyRadians, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    rangeInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( zNear, zFar ) );
+    vmathSoaV4MakeFromElems( &result->col0, divf4( f, aspect ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), f, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( vec_add( zNear, zFar ), rangeInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){-1.0f,-1.0f,-1.0f,-1.0f}) );
+    vmathSoaV4MakeFromElems( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( vec_madd( vec_madd( zNear, zFar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), rangeInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){2.0f,2.0f,2.0f,2.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaM4MakeFrustum( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
+{
+    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
+    sum_rl = vec_add( right, left );
+    sum_tb = vec_add( top, bottom );
+    sum_nf = vec_add( zNear, zFar );
+    inv_rl = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( right, left ) );
+    inv_tb = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( top, bottom ) );
+    inv_nf = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( zNear, zFar ) );
+    n2 = vec_add( zNear, zNear );
+    vmathSoaV4MakeFromElems( &result->col0, vec_madd( n2, inv_rl, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( n2, inv_tb, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col2, vec_madd( sum_rl, inv_rl, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sum_tb, inv_tb, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sum_nf, inv_nf, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){-1.0f,-1.0f,-1.0f,-1.0f}) );
+    vmathSoaV4MakeFromElems( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( vec_madd( n2, inv_nf, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), zFar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaM4MakeOrthographic( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
+{
+    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
+    sum_rl = vec_add( right, left );
+    sum_tb = vec_add( top, bottom );
+    sum_nf = vec_add( zNear, zFar );
+    inv_rl = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( right, left ) );
+    inv_tb = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( top, bottom ) );
+    inv_nf = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( zNear, zFar ) );
+    vmathSoaV4MakeFromElems( &result->col0, vec_add( inv_rl, inv_rl ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_add( inv_tb, inv_tb ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_add( inv_nf, inv_nf ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col3, vec_madd( negatef4( sum_rl ), inv_rl, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( negatef4( sum_tb ), inv_tb, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sum_nf, inv_nf, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+static inline void vmathSoaM4Select( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1, vec_uint4 select1 )
+{
+    vmathSoaV4Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
+    vmathSoaV4Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
+    vmathSoaV4Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
+    vmathSoaV4Select( &result->col3, &mat0->col3, &mat1->col3, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaM4Print( const VmathSoaMatrix4 *mat )
+{
+    VmathMatrix4 mat0, mat1, mat2, mat3;
+    vmathSoaM4Get4Aos( mat, &mat0, &mat1, &mat2, &mat3 );
+    printf("slot 0:\n");
+    vmathM4Print( &mat0 );
+    printf("slot 1:\n");
+    vmathM4Print( &mat1 );
+    printf("slot 2:\n");
+    vmathM4Print( &mat2 );
+    printf("slot 3:\n");
+    vmathM4Print( &mat3 );
+}
+
+static inline void vmathSoaM4Prints( const VmathSoaMatrix4 *mat, const char *name )
+{
+    printf("%s:\n", name);
+    vmathSoaM4Print( mat );
+}
+
+#endif
+
+static inline void vmathSoaT3Copy( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3Copy( &result->col0, &tfrm->col0 );
+    vmathSoaV3Copy( &result->col1, &tfrm->col1 );
+    vmathSoaV3Copy( &result->col2, &tfrm->col2 );
+    vmathSoaV3Copy( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathSoaT3MakeFromScalar( VmathSoaTransform3 *result, vec_float4 scalar )
+{
+    vmathSoaV3MakeFromScalar( &result->col0, scalar );
+    vmathSoaV3MakeFromScalar( &result->col1, scalar );
+    vmathSoaV3MakeFromScalar( &result->col2, scalar );
+    vmathSoaV3MakeFromScalar( &result->col3, scalar );
+}
+
+static inline void vmathSoaT3MakeFromCols( VmathSoaTransform3 *result, const VmathSoaVector3 *_col0, const VmathSoaVector3 *_col1, const VmathSoaVector3 *_col2, const VmathSoaVector3 *_col3 )
+{
+    vmathSoaV3Copy( &result->col0, _col0 );
+    vmathSoaV3Copy( &result->col1, _col1 );
+    vmathSoaV3Copy( &result->col2, _col2 );
+    vmathSoaV3Copy( &result->col3, _col3 );
+}
+
+static inline void vmathSoaT3MakeFromM3V3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *tfrm, const VmathSoaVector3 *translateVec )
+{
+    vmathSoaT3SetUpper3x3( result, tfrm );
+    vmathSoaT3SetTranslation( result, translateVec );
+}
+
+static inline void vmathSoaT3MakeFromQV3( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec )
+{
+    VmathSoaMatrix3 tmpM3_0;
+    vmathSoaM3MakeFromQ( &tmpM3_0, unitQuat );
+    vmathSoaT3SetUpper3x3( result, &tmpM3_0 );
+    vmathSoaT3SetTranslation( result, translateVec );
+}
+
+static inline void vmathSoaT3MakeFromAos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm )
+{
+    vmathSoaV3MakeFromAos( &result->col0, &tfrm->col0 );
+    vmathSoaV3MakeFromAos( &result->col1, &tfrm->col1 );
+    vmathSoaV3MakeFromAos( &result->col2, &tfrm->col2 );
+    vmathSoaV3MakeFromAos( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathSoaT3MakeFrom4Aos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, const VmathTransform3 *tfrm2, const VmathTransform3 *tfrm3 )
+{
+    vmathSoaV3MakeFrom4Aos( &result->col0, &tfrm0->col0, &tfrm1->col0, &tfrm2->col0, &tfrm3->col0 );
+    vmathSoaV3MakeFrom4Aos( &result->col1, &tfrm0->col1, &tfrm1->col1, &tfrm2->col1, &tfrm3->col1 );
+    vmathSoaV3MakeFrom4Aos( &result->col2, &tfrm0->col2, &tfrm1->col2, &tfrm2->col2, &tfrm3->col2 );
+    vmathSoaV3MakeFrom4Aos( &result->col3, &tfrm0->col3, &tfrm1->col3, &tfrm2->col3, &tfrm3->col3 );
+}
+
+static inline void vmathSoaT3Get4Aos( const VmathSoaTransform3 *tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 )
+{
+    vmathSoaV3Get4Aos( &tfrm->col0, &result0->col0, &result1->col0, &result2->col0, &result3->col0 );
+    vmathSoaV3Get4Aos( &tfrm->col1, &result0->col1, &result1->col1, &result2->col1, &result3->col1 );
+    vmathSoaV3Get4Aos( &tfrm->col2, &result0->col2, &result1->col2, &result2->col2, &result3->col2 );
+    vmathSoaV3Get4Aos( &tfrm->col3, &result0->col3, &result1->col3, &result2->col3, &result3->col3 );
+}
+
+static inline void vmathSoaT3SetCol0( VmathSoaTransform3 *result, const VmathSoaVector3 *_col0 )
+{
+    vmathSoaV3Copy( &result->col0, _col0 );
+}
+
+static inline void vmathSoaT3SetCol1( VmathSoaTransform3 *result, const VmathSoaVector3 *_col1 )
+{
+    vmathSoaV3Copy( &result->col1, _col1 );
+}
+
+static inline void vmathSoaT3SetCol2( VmathSoaTransform3 *result, const VmathSoaVector3 *_col2 )
+{
+    vmathSoaV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathSoaT3SetCol3( VmathSoaTransform3 *result, const VmathSoaVector3 *_col3 )
+{
+    vmathSoaV3Copy( &result->col3, _col3 );
+}
+
+static inline void vmathSoaT3SetCol( VmathSoaTransform3 *result, int col, const VmathSoaVector3 *vec )
+{
+    vmathSoaV3Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathSoaT3SetRow( VmathSoaTransform3 *result, int row, const VmathSoaVector4 *vec )
+{
+    vmathSoaV3SetElem( &result->col0, row, vmathSoaV4GetElem( vec, 0 ) );
+    vmathSoaV3SetElem( &result->col1, row, vmathSoaV4GetElem( vec, 1 ) );
+    vmathSoaV3SetElem( &result->col2, row, vmathSoaV4GetElem( vec, 2 ) );
+    vmathSoaV3SetElem( &result->col3, row, vmathSoaV4GetElem( vec, 3 ) );
+}
+
+static inline void vmathSoaT3SetElem( VmathSoaTransform3 *result, int col, int row, vec_float4 val )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaT3GetCol( &tmpV3_0, result, col );
+    vmathSoaV3SetElem( &tmpV3_0, row, val );
+    vmathSoaT3SetCol( result, col, &tmpV3_0 );
+}
+
+static inline vec_float4 vmathSoaT3GetElem( const VmathSoaTransform3 *tfrm, int col, int row )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaT3GetCol( &tmpV3_0, tfrm, col );
+    return vmathSoaV3GetElem( &tmpV3_0, row );
+}
+
+static inline void vmathSoaT3GetCol0( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3Copy( result, &tfrm->col0 );
+}
+
+static inline void vmathSoaT3GetCol1( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3Copy( result, &tfrm->col1 );
+}
+
+static inline void vmathSoaT3GetCol2( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3Copy( result, &tfrm->col2 );
+}
+
+static inline void vmathSoaT3GetCol3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3Copy( result, &tfrm->col3 );
+}
+
+static inline void vmathSoaT3GetCol( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, int col )
+{
+    vmathSoaV3Copy( result, (&tfrm->col0 + col) );
+}
+
+static inline void vmathSoaT3GetRow( VmathSoaVector4 *result, const VmathSoaTransform3 *tfrm, int row )
+{
+    vmathSoaV4MakeFromElems( result, vmathSoaV3GetElem( &tfrm->col0, row ), vmathSoaV3GetElem( &tfrm->col1, row ), vmathSoaV3GetElem( &tfrm->col2, row ), vmathSoaV3GetElem( &tfrm->col3, row ) );
+}
+
+static inline void vmathSoaT3Inverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
+{
+    VmathSoaVector3 tmp0, tmp1, tmp2, inv0, inv1, inv2, tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5;
+    vec_float4 detinv;
+    vmathSoaV3Cross( &tmp0, &tfrm->col1, &tfrm->col2 );
+    vmathSoaV3Cross( &tmp1, &tfrm->col2, &tfrm->col0 );
+    vmathSoaV3Cross( &tmp2, &tfrm->col0, &tfrm->col1 );
+    detinv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vmathSoaV3Dot( &tfrm->col2, &tmp2 ) );
+    vmathSoaV3MakeFromElems( &inv0, vec_madd( tmp0.x, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.x, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.x, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV3MakeFromElems( &inv1, vec_madd( tmp0.y, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.y, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.y, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV3MakeFromElems( &inv2, vec_madd( tmp0.z, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.z, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.z, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV3Copy( &result->col0, &inv0 );
+    vmathSoaV3Copy( &result->col1, &inv1 );
+    vmathSoaV3Copy( &result->col2, &inv2 );
+    vmathSoaV3ScalarMul( &tmpV3_0, &inv0, tfrm->col3.x );
+    vmathSoaV3ScalarMul( &tmpV3_1, &inv1, tfrm->col3.y );
+    vmathSoaV3ScalarMul( &tmpV3_2, &inv2, tfrm->col3.z );
+    vmathSoaV3Add( &tmpV3_3, &tmpV3_1, &tmpV3_2 );
+    vmathSoaV3Add( &tmpV3_4, &tmpV3_0, &tmpV3_3 );
+    vmathSoaV3Neg( &tmpV3_5, &tmpV3_4 );
+    vmathSoaV3Copy( &result->col3, &tmpV3_5 );
+}
+
+static inline void vmathSoaT3OrthoInverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
+{
+    VmathSoaVector3 inv0, inv1, inv2, tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5;
+    vmathSoaV3MakeFromElems( &inv0, tfrm->col0.x, tfrm->col1.x, tfrm->col2.x );
+    vmathSoaV3MakeFromElems( &inv1, tfrm->col0.y, tfrm->col1.y, tfrm->col2.y );
+    vmathSoaV3MakeFromElems( &inv2, tfrm->col0.z, tfrm->col1.z, tfrm->col2.z );
+    vmathSoaV3Copy( &result->col0, &inv0 );
+    vmathSoaV3Copy( &result->col1, &inv1 );
+    vmathSoaV3Copy( &result->col2, &inv2 );
+    vmathSoaV3ScalarMul( &tmpV3_0, &inv0, tfrm->col3.x );
+    vmathSoaV3ScalarMul( &tmpV3_1, &inv1, tfrm->col3.y );
+    vmathSoaV3ScalarMul( &tmpV3_2, &inv2, tfrm->col3.z );
+    vmathSoaV3Add( &tmpV3_3, &tmpV3_1, &tmpV3_2 );
+    vmathSoaV3Add( &tmpV3_4, &tmpV3_0, &tmpV3_3 );
+    vmathSoaV3Neg( &tmpV3_5, &tmpV3_4 );
+    vmathSoaV3Copy( &result->col3, &tmpV3_5 );
+}
+
+static inline void vmathSoaT3AbsPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3AbsPerElem( &result->col0, &tfrm->col0 );
+    vmathSoaV3AbsPerElem( &result->col1, &tfrm->col1 );
+    vmathSoaV3AbsPerElem( &result->col2, &tfrm->col2 );
+    vmathSoaV3AbsPerElem( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathSoaT3MulV3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *vec )
+{
+    vec_float4 tmpX, tmpY, tmpZ;
+    tmpX = vec_add( vec_add( vec_madd( tfrm->col0.x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tfrm->col1.x, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tfrm->col2.x, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpY = vec_add( vec_add( vec_madd( tfrm->col0.y, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tfrm->col1.y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tfrm->col2.y, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpZ = vec_add( vec_add( vec_madd( tfrm->col0.z, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tfrm->col1.z, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tfrm->col2.z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathSoaT3MulP3( VmathSoaPoint3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaPoint3 *pnt )
+{
+    vec_float4 tmpX, tmpY, tmpZ;
+    tmpX = vec_add( vec_add( vec_add( vec_madd( tfrm->col0.x, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tfrm->col1.x, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tfrm->col2.x, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), tfrm->col3.x );
+    tmpY = vec_add( vec_add( vec_add( vec_madd( tfrm->col0.y, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tfrm->col1.y, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tfrm->col2.y, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), tfrm->col3.y );
+    tmpZ = vec_add( vec_add( vec_add( vec_madd( tfrm->col0.z, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tfrm->col1.z, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tfrm->col2.z, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), tfrm->col3.z );
+    vmathSoaP3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathSoaT3Mul( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 )
+{
+    VmathSoaTransform3 tmpResult;
+    VmathSoaPoint3 tmpP3_0, tmpP3_1;
+    vmathSoaT3MulV3( &tmpResult.col0, tfrm0, &tfrm1->col0 );
+    vmathSoaT3MulV3( &tmpResult.col1, tfrm0, &tfrm1->col1 );
+    vmathSoaT3MulV3( &tmpResult.col2, tfrm0, &tfrm1->col2 );
+    vmathSoaP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
+    vmathSoaT3MulP3( &tmpP3_1, tfrm0, &tmpP3_0 );
+    vmathSoaV3MakeFromP3( &tmpResult.col3, &tmpP3_1 );
+    vmathSoaT3Copy( result, &tmpResult );
+}
+
+static inline void vmathSoaT3MulPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 )
+{
+    vmathSoaV3MulPerElem( &result->col0, &tfrm0->col0, &tfrm1->col0 );
+    vmathSoaV3MulPerElem( &result->col1, &tfrm0->col1, &tfrm1->col1 );
+    vmathSoaV3MulPerElem( &result->col2, &tfrm0->col2, &tfrm1->col2 );
+    vmathSoaV3MulPerElem( &result->col3, &tfrm0->col3, &tfrm1->col3 );
+}
+
+static inline void vmathSoaT3MakeIdentity( VmathSoaTransform3 *result )
+{
+    vmathSoaV3MakeXAxis( &result->col0 );
+    vmathSoaV3MakeYAxis( &result->col1 );
+    vmathSoaV3MakeZAxis( &result->col2 );
+    vmathSoaV3MakeFromScalar( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaT3SetUpper3x3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *tfrm )
+{
+    vmathSoaV3Copy( &result->col0, &tfrm->col0 );
+    vmathSoaV3Copy( &result->col1, &tfrm->col1 );
+    vmathSoaV3Copy( &result->col2, &tfrm->col2 );
+}
+
+static inline void vmathSoaT3GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaM3MakeFromCols( result, &tfrm->col0, &tfrm->col1, &tfrm->col2 );
+}
+
+static inline void vmathSoaT3SetTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec )
+{
+    vmathSoaV3Copy( &result->col3, translateVec );
+}
+
+static inline void vmathSoaT3GetTranslation( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3Copy( result, &tfrm->col3 );
+}
+
+static inline void vmathSoaT3MakeRotationX( VmathSoaTransform3 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV3MakeXAxis( &result->col0 );
+    vmathSoaV3MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, s );
+    vmathSoaV3MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), c );
+    vmathSoaV3MakeFromScalar( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaT3MakeRotationY( VmathSoaTransform3 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV3MakeFromElems( &result->col0, c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ) );
+    vmathSoaV3MakeYAxis( &result->col1 );
+    vmathSoaV3MakeFromElems( &result->col2, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c );
+    vmathSoaV3MakeFromScalar( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaT3MakeRotationZ( VmathSoaTransform3 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV3MakeFromElems( &result->col0, c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV3MakeFromElems( &result->col1, negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV3MakeZAxis( &result->col2 );
+    vmathSoaV3MakeFromScalar( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaT3MakeRotationZYX( VmathSoaTransform3 *result, const VmathSoaVector3 *radiansXYZ )
+{
+    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sincosf4( radiansXYZ->x, &sX, &cX );
+    sincosf4( radiansXYZ->y, &sY, &cY );
+    sincosf4( radiansXYZ->z, &sZ, &cZ );
+    tmp0 = vec_madd( cZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmp1 = vec_madd( sZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV3MakeFromElems( &result->col0, vec_madd( cZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), negatef4( sY ) );
+    vmathSoaV3MakeFromElems( &result->col1, vec_sub( vec_madd( tmp0, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( tmp1, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV3MakeFromElems( &result->col2, vec_add( vec_madd( tmp0, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( tmp1, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV3MakeFromScalar( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaT3MakeRotationAxis( VmathSoaTransform3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
+{
+    VmathSoaMatrix3 tmpM3_0;
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaM3MakeRotationAxis( &tmpM3_0, radians, unitVec );
+    vmathSoaV3MakeFromScalar( &tmpV3_0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
+}
+
+static inline void vmathSoaT3MakeRotationQ( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat )
+{
+    VmathSoaMatrix3 tmpM3_0;
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaM3MakeFromQ( &tmpM3_0, unitQuat );
+    vmathSoaV3MakeFromScalar( &tmpV3_0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
+}
+
+static inline void vmathSoaT3MakeScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec )
+{
+    vmathSoaV3MakeFromElems( &result->col0, scaleVec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV3MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV3MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec->z );
+    vmathSoaV3MakeFromScalar( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaT3AppendScale( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *scaleVec )
+{
+    vmathSoaV3ScalarMul( &result->col0, &tfrm->col0, vmathSoaV3GetX( scaleVec ) );
+    vmathSoaV3ScalarMul( &result->col1, &tfrm->col1, vmathSoaV3GetY( scaleVec ) );
+    vmathSoaV3ScalarMul( &result->col2, &tfrm->col2, vmathSoaV3GetZ( scaleVec ) );
+    vmathSoaV3Copy( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathSoaT3PrependScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3MulPerElem( &result->col0, &tfrm->col0, scaleVec );
+    vmathSoaV3MulPerElem( &result->col1, &tfrm->col1, scaleVec );
+    vmathSoaV3MulPerElem( &result->col2, &tfrm->col2, scaleVec );
+    vmathSoaV3MulPerElem( &result->col3, &tfrm->col3, scaleVec );
+}
+
+static inline void vmathSoaT3MakeTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec )
+{
+    vmathSoaV3MakeXAxis( &result->col0 );
+    vmathSoaV3MakeYAxis( &result->col1 );
+    vmathSoaV3MakeZAxis( &result->col2 );
+    vmathSoaV3Copy( &result->col3, translateVec );
+}
+
+static inline void vmathSoaT3Select( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1, vec_uint4 select1 )
+{
+    vmathSoaV3Select( &result->col0, &tfrm0->col0, &tfrm1->col0, select1 );
+    vmathSoaV3Select( &result->col1, &tfrm0->col1, &tfrm1->col1, select1 );
+    vmathSoaV3Select( &result->col2, &tfrm0->col2, &tfrm1->col2, select1 );
+    vmathSoaV3Select( &result->col3, &tfrm0->col3, &tfrm1->col3, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaT3Print( const VmathSoaTransform3 *tfrm )
+{
+    VmathTransform3 mat0, mat1, mat2, mat3;
+    vmathSoaT3Get4Aos( tfrm, &mat0, &mat1, &mat2, &mat3 );
+    printf("slot 0:\n");
+    vmathT3Print( &mat0 );
+    printf("slot 1:\n");
+    vmathT3Print( &mat1 );
+    printf("slot 2:\n");
+    vmathT3Print( &mat2 );
+    printf("slot 3:\n");
+    vmathT3Print( &mat3 );
+}
+
+static inline void vmathSoaT3Prints( const VmathSoaTransform3 *tfrm, const char *name )
+{
+    printf("%s:\n", name);
+    vmathSoaT3Print( tfrm );
+}
+
+#endif
+
+static inline void vmathSoaQMakeFromM3( VmathSoaQuat *result, const VmathSoaMatrix3 *tfrm )
+{
+    vec_float4 trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
+    vec_uint4 negTrace, ZgtX, ZgtY, YgtX;
+    vec_uint4 largestXorY, largestYorZ, largestZorX;
+
+    xx = tfrm->col0.x;
+    yx = tfrm->col0.y;
+    zx = tfrm->col0.z;
+    xy = tfrm->col1.x;
+    yy = tfrm->col1.y;
+    zy = tfrm->col1.z;
+    xz = tfrm->col2.x;
+    yz = tfrm->col2.y;
+    zz = tfrm->col2.z;
+
+    trace = vec_add( vec_add( xx, yy ), zz );
+
+    negTrace = (vec_uint4)vec_cmpgt( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), trace );
+    ZgtX = (vec_uint4)vec_cmpgt( zz, xx );
+    ZgtY = (vec_uint4)vec_cmpgt( zz, yy );
+    YgtX = (vec_uint4)vec_cmpgt( yy, xx );
+    largestXorY = vec_andc( negTrace, vec_and( ZgtX, ZgtY ) );
+    largestYorZ = vec_and( negTrace, vec_or( YgtX, ZgtX ) );
+    largestZorX = vec_andc( negTrace, vec_andc( YgtX, ZgtY ) );
+    
+    zz = vec_sel( zz, negatef4(zz), largestXorY );
+    xy = vec_sel( xy, negatef4(xy), largestXorY );
+    xx = vec_sel( xx, negatef4(xx), largestYorZ );
+    yz = vec_sel( yz, negatef4(yz), largestYorZ );
+    yy = vec_sel( yy, negatef4(yy), largestZorX );
+    zx = vec_sel( zx, negatef4(zx), largestZorX );
+
+    radicand = vec_add( vec_add( vec_add( xx, yy ), zz ), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+    scale = vec_madd( ((vec_float4){0.5f,0.5f,0.5f,0.5f}), divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( radicand ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+
+    tmpx = vec_madd( vec_sub( zy, yz ), scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmpy = vec_madd( vec_sub( xz, zx ), scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmpz = vec_madd( vec_sub( yx, xy ), scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmpw = vec_madd( radicand, scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qx = tmpx;
+    qy = tmpy;
+    qz = tmpz;
+    qw = tmpw;
+
+    qx = vec_sel( qx, tmpw, largestXorY );
+    qy = vec_sel( qy, tmpz, largestXorY );
+    qz = vec_sel( qz, tmpy, largestXorY );
+    qw = vec_sel( qw, tmpx, largestXorY );
+    tmpx = qx;
+    tmpz = qz;
+    qx = vec_sel( qx, qy, largestYorZ );
+    qy = vec_sel( qy, tmpx, largestYorZ );
+    qz = vec_sel( qz, qw, largestYorZ );
+    qw = vec_sel( qw, tmpz, largestYorZ );
+
+    result->x = qx;
+    result->y = qy;
+    result->z = qz;
+    result->w = qw;
+}
+
+static inline void vmathSoaV3Outer( VmathSoaMatrix3 *result, const VmathSoaVector3 *tfrm0, const VmathSoaVector3 *tfrm1 )
+{
+    vmathSoaV3ScalarMul( &result->col0, tfrm0, vmathSoaV3GetX( tfrm1 ) );
+    vmathSoaV3ScalarMul( &result->col1, tfrm0, vmathSoaV3GetY( tfrm1 ) );
+    vmathSoaV3ScalarMul( &result->col2, tfrm0, vmathSoaV3GetZ( tfrm1 ) );
+}
+
+static inline void vmathSoaV4Outer( VmathSoaMatrix4 *result, const VmathSoaVector4 *tfrm0, const VmathSoaVector4 *tfrm1 )
+{
+    vmathSoaV4ScalarMul( &result->col0, tfrm0, vmathSoaV4GetX( tfrm1 ) );
+    vmathSoaV4ScalarMul( &result->col1, tfrm0, vmathSoaV4GetY( tfrm1 ) );
+    vmathSoaV4ScalarMul( &result->col2, tfrm0, vmathSoaV4GetZ( tfrm1 ) );
+    vmathSoaV4ScalarMul( &result->col3, tfrm0, vmathSoaV4GetW( tfrm1 ) );
+}
+
+static inline void vmathSoaV3RowMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat )
+{
+    vec_float4 tmpX, tmpY, tmpZ;
+    tmpX = vec_add( vec_add( vec_madd( vec->x, mat->col0.x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec->y, mat->col0.y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( vec->z, mat->col0.z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpY = vec_add( vec_add( vec_madd( vec->x, mat->col1.x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec->y, mat->col1.y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( vec->z, mat->col1.z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpZ = vec_add( vec_add( vec_madd( vec->x, mat->col2.x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec->y, mat->col2.y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( vec->z, mat->col2.z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathSoaV3CrossMatrix( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec )
+{
+    vmathSoaV3MakeFromElems( &result->col0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec->z, negatef4( vec->y ) );
+    vmathSoaV3MakeFromElems( &result->col1, negatef4( vec->z ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec->x );
+    vmathSoaV3MakeFromElems( &result->col2, vec->y, negatef4( vec->x ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaV3CrossMatrixMul( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat )
+{
+    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2;
+    vmathSoaV3Cross( &tmpV3_0, vec, &mat->col0 );
+    vmathSoaV3Cross( &tmpV3_1, vec, &mat->col1 );
+    vmathSoaV3Cross( &tmpV3_2, vec, &mat->col2 );
+    vmathSoaM3MakeFromCols( result, &tmpV3_0, &tmpV3_1, &tmpV3_2 );
+}
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_soa_v.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_soa_v.h
index 0b16a9553..c8401e3d0 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_soa_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_soa_v.h
@@ -1,1063 +1,1063 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_SOA_V_C_H
-#define _VECTORMATH_MAT_SOA_V_C_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- */
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromQ_V( VmathSoaQuat unitQuat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeFromQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromCols_V( VmathSoaVector3 _col0, VmathSoaVector3 _col1, VmathSoaVector3 _col2 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeFromCols(&result, &_col0, &_col1, &_col2);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromAos_V( VmathMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeFromAos(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeFrom4Aos_V( VmathMatrix3 mat0, VmathMatrix3 mat1, VmathMatrix3 mat2, VmathMatrix3 mat3 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeFrom4Aos(&result, &mat0, &mat1, &mat2, &mat3);
-    return result;
-}
-
-static inline void vmathSoaM3Get4Aos_V( VmathSoaMatrix3 mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 )
-{
-    vmathSoaM3Get4Aos(&mat, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaM3SetCol0_V( VmathSoaMatrix3 *result, VmathSoaVector3 _col0 )
-{
-    vmathSoaM3SetCol0(result, &_col0);
-}
-
-static inline void vmathSoaM3SetCol1_V( VmathSoaMatrix3 *result, VmathSoaVector3 _col1 )
-{
-    vmathSoaM3SetCol1(result, &_col1);
-}
-
-static inline void vmathSoaM3SetCol2_V( VmathSoaMatrix3 *result, VmathSoaVector3 _col2 )
-{
-    vmathSoaM3SetCol2(result, &_col2);
-}
-
-static inline void vmathSoaM3SetCol_V( VmathSoaMatrix3 *result, int col, VmathSoaVector3 vec )
-{
-    vmathSoaM3SetCol(result, col, &vec);
-}
-
-static inline void vmathSoaM3SetRow_V( VmathSoaMatrix3 *result, int row, VmathSoaVector3 vec )
-{
-    vmathSoaM3SetRow(result, row, &vec);
-}
-
-static inline void vmathSoaM3SetElem_V( VmathSoaMatrix3 *result, int col, int row, vec_float4 val )
-{
-    vmathSoaM3SetElem(result, col, row, val);
-}
-
-static inline vec_float4 vmathSoaM3GetElem_V( VmathSoaMatrix3 mat, int col, int row )
-{
-    return vmathSoaM3GetElem(&mat, col, row);
-}
-
-static inline VmathSoaVector3 vmathSoaM3GetCol0_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaVector3 result;
-    vmathSoaM3GetCol0(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaM3GetCol1_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaVector3 result;
-    vmathSoaM3GetCol1(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaM3GetCol2_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaVector3 result;
-    vmathSoaM3GetCol2(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaM3GetCol_V( VmathSoaMatrix3 mat, int col )
-{
-    VmathSoaVector3 result;
-    vmathSoaM3GetCol(&result, &mat, col);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaM3GetRow_V( VmathSoaMatrix3 mat, int row )
-{
-    VmathSoaVector3 result;
-    vmathSoaM3GetRow(&result, &mat, row);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Transpose_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Transpose(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Inverse_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Inverse(&result, &mat);
-    return result;
-}
-
-static inline vec_float4 vmathSoaM3Determinant_V( VmathSoaMatrix3 mat )
-{
-    return vmathSoaM3Determinant(&mat);
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Add_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Add(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Sub_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Sub(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Neg_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Neg(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3AbsPerElem_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3AbsPerElem(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3ScalarMul_V( VmathSoaMatrix3 mat, vec_float4 scalar )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3ScalarMul(&result, &mat, scalar);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaM3MulV3_V( VmathSoaMatrix3 mat, VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaM3MulV3(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Mul_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Mul(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MulPerElem_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MulPerElem(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeIdentity_V( )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeIdentity(&result);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationX_V( vec_float4 radians )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationY_V( vec_float4 radians )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZ_V( vec_float4 radians )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationQ_V( VmathSoaQuat unitQuat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeScale_V( VmathSoaVector3 scaleVec )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3AppendScale_V( VmathSoaMatrix3 mat, VmathSoaVector3 scaleVec )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3AppendScale(&result, &mat, &scaleVec);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3PrependScale(&result, &scaleVec, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Select_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1, vec_uint4 select1 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Select(&result, &mat0, &mat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaM3Print_V( VmathSoaMatrix3 mat )
-{
-    vmathSoaM3Print(&mat);
-}
-
-static inline void vmathSoaM3Prints_V( VmathSoaMatrix3 mat, const char *name )
-{
-    vmathSoaM3Prints(&mat, name);
-}
-
-#endif
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromT3_V( VmathSoaTransform3 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFromT3(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromCols_V( VmathSoaVector4 _col0, VmathSoaVector4 _col1, VmathSoaVector4 _col2, VmathSoaVector4 _col3 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromM3V3_V( VmathSoaMatrix3 mat, VmathSoaVector3 translateVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFromM3V3(&result, &mat, &translateVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFromQV3(&result, &unitQuat, &translateVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromAos_V( VmathMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFromAos(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFrom4Aos_V( VmathMatrix4 mat0, VmathMatrix4 mat1, VmathMatrix4 mat2, VmathMatrix4 mat3 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFrom4Aos(&result, &mat0, &mat1, &mat2, &mat3);
-    return result;
-}
-
-static inline void vmathSoaM4Get4Aos_V( VmathSoaMatrix4 mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 )
-{
-    vmathSoaM4Get4Aos(&mat, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaM4SetCol0_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col0 )
-{
-    vmathSoaM4SetCol0(result, &_col0);
-}
-
-static inline void vmathSoaM4SetCol1_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col1 )
-{
-    vmathSoaM4SetCol1(result, &_col1);
-}
-
-static inline void vmathSoaM4SetCol2_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col2 )
-{
-    vmathSoaM4SetCol2(result, &_col2);
-}
-
-static inline void vmathSoaM4SetCol3_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col3 )
-{
-    vmathSoaM4SetCol3(result, &_col3);
-}
-
-static inline void vmathSoaM4SetCol_V( VmathSoaMatrix4 *result, int col, VmathSoaVector4 vec )
-{
-    vmathSoaM4SetCol(result, col, &vec);
-}
-
-static inline void vmathSoaM4SetRow_V( VmathSoaMatrix4 *result, int row, VmathSoaVector4 vec )
-{
-    vmathSoaM4SetRow(result, row, &vec);
-}
-
-static inline void vmathSoaM4SetElem_V( VmathSoaMatrix4 *result, int col, int row, vec_float4 val )
-{
-    vmathSoaM4SetElem(result, col, row, val);
-}
-
-static inline vec_float4 vmathSoaM4GetElem_V( VmathSoaMatrix4 mat, int col, int row )
-{
-    return vmathSoaM4GetElem(&mat, col, row);
-}
-
-static inline VmathSoaVector4 vmathSoaM4GetCol0_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4GetCol0(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4GetCol1_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4GetCol1(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4GetCol2_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4GetCol2(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4GetCol3_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4GetCol3(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4GetCol_V( VmathSoaMatrix4 mat, int col )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4GetCol(&result, &mat, col);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4GetRow_V( VmathSoaMatrix4 mat, int row )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4GetRow(&result, &mat, row);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Transpose_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Transpose(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Inverse_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Inverse(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4AffineInverse_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4AffineInverse(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4OrthoInverse_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4OrthoInverse(&result, &mat);
-    return result;
-}
-
-static inline vec_float4 vmathSoaM4Determinant_V( VmathSoaMatrix4 mat )
-{
-    return vmathSoaM4Determinant(&mat);
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Add_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Add(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Sub_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Sub(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Neg_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Neg(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4AbsPerElem_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4AbsPerElem(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4ScalarMul_V( VmathSoaMatrix4 mat, vec_float4 scalar )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4ScalarMul(&result, &mat, scalar);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4MulV4_V( VmathSoaMatrix4 mat, VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4MulV4(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4MulV3_V( VmathSoaMatrix4 mat, VmathSoaVector3 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4MulV3(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4MulP3_V( VmathSoaMatrix4 mat, VmathSoaPoint3 pnt )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4MulP3(&result, &mat, &pnt);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Mul_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Mul(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MulT3_V( VmathSoaMatrix4 mat, VmathSoaTransform3 tfrm1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MulT3(&result, &mat, &tfrm1);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MulPerElem_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MulPerElem(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeIdentity_V( )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeIdentity(&result);
-    return result;
-}
-
-static inline void vmathSoaM4SetUpper3x3_V( VmathSoaMatrix4 *result, VmathSoaMatrix3 mat3 )
-{
-    vmathSoaM4SetUpper3x3(result, &mat3);
-}
-
-static inline VmathSoaMatrix3 vmathSoaM4GetUpper3x3_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM4GetUpper3x3(&result, &mat);
-    return result;
-}
-
-static inline void vmathSoaM4SetTranslation_V( VmathSoaMatrix4 *result, VmathSoaVector3 translateVec )
-{
-    vmathSoaM4SetTranslation(result, &translateVec);
-}
-
-static inline VmathSoaVector3 vmathSoaM4GetTranslation_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaVector3 result;
-    vmathSoaM4GetTranslation(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationX_V( vec_float4 radians )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationY_V( vec_float4 radians )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZ_V( vec_float4 radians )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZYX_V( VmathSoaVector3 radiansXYZ )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationQ_V( VmathSoaQuat unitQuat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeScale_V( VmathSoaVector3 scaleVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4AppendScale_V( VmathSoaMatrix4 mat, VmathSoaVector3 scaleVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4AppendScale(&result, &mat, &scaleVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4PrependScale(&result, &scaleVec, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeTranslation_V( VmathSoaVector3 translateVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeTranslation(&result, &translateVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeLookAt_V( VmathSoaPoint3 eyePos, VmathSoaPoint3 lookAtPos, VmathSoaVector3 upVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeLookAt(&result, &eyePos, &lookAtPos, &upVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakePerspective_V( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakePerspective(&result, fovyRadians, aspect, zNear, zFar);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFrustum_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFrustum(&result, left, right, bottom, top, zNear, zFar);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeOrthographic_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeOrthographic(&result, left, right, bottom, top, zNear, zFar);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Select_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1, vec_uint4 select1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Select(&result, &mat0, &mat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaM4Print_V( VmathSoaMatrix4 mat )
-{
-    vmathSoaM4Print(&mat);
-}
-
-static inline void vmathSoaM4Prints_V( VmathSoaMatrix4 mat, const char *name )
-{
-    vmathSoaM4Prints(&mat, name);
-}
-
-#endif
-
-static inline VmathSoaTransform3 vmathSoaT3MakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeFromCols_V( VmathSoaVector3 _col0, VmathSoaVector3 _col1, VmathSoaVector3 _col2, VmathSoaVector3 _col3 )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeFromM3V3_V( VmathSoaMatrix3 tfrm, VmathSoaVector3 translateVec )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeFromM3V3(&result, &tfrm, &translateVec);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeFromQV3(&result, &unitQuat, &translateVec);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeFromAos_V( VmathTransform3 tfrm )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeFromAos(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeFrom4Aos_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, VmathTransform3 tfrm2, VmathTransform3 tfrm3 )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeFrom4Aos(&result, &tfrm0, &tfrm1, &tfrm2, &tfrm3);
-    return result;
-}
-
-static inline void vmathSoaT3Get4Aos_V( VmathSoaTransform3 tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 )
-{
-    vmathSoaT3Get4Aos(&tfrm, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaT3SetCol0_V( VmathSoaTransform3 *result, VmathSoaVector3 _col0 )
-{
-    vmathSoaT3SetCol0(result, &_col0);
-}
-
-static inline void vmathSoaT3SetCol1_V( VmathSoaTransform3 *result, VmathSoaVector3 _col1 )
-{
-    vmathSoaT3SetCol1(result, &_col1);
-}
-
-static inline void vmathSoaT3SetCol2_V( VmathSoaTransform3 *result, VmathSoaVector3 _col2 )
-{
-    vmathSoaT3SetCol2(result, &_col2);
-}
-
-static inline void vmathSoaT3SetCol3_V( VmathSoaTransform3 *result, VmathSoaVector3 _col3 )
-{
-    vmathSoaT3SetCol3(result, &_col3);
-}
-
-static inline void vmathSoaT3SetCol_V( VmathSoaTransform3 *result, int col, VmathSoaVector3 vec )
-{
-    vmathSoaT3SetCol(result, col, &vec);
-}
-
-static inline void vmathSoaT3SetRow_V( VmathSoaTransform3 *result, int row, VmathSoaVector4 vec )
-{
-    vmathSoaT3SetRow(result, row, &vec);
-}
-
-static inline void vmathSoaT3SetElem_V( VmathSoaTransform3 *result, int col, int row, vec_float4 val )
-{
-    vmathSoaT3SetElem(result, col, row, val);
-}
-
-static inline vec_float4 vmathSoaT3GetElem_V( VmathSoaTransform3 tfrm, int col, int row )
-{
-    return vmathSoaT3GetElem(&tfrm, col, row);
-}
-
-static inline VmathSoaVector3 vmathSoaT3GetCol0_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3GetCol0(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaT3GetCol1_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3GetCol1(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaT3GetCol2_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3GetCol2(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaT3GetCol3_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3GetCol3(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaT3GetCol_V( VmathSoaTransform3 tfrm, int col )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3GetCol(&result, &tfrm, col);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaT3GetRow_V( VmathSoaTransform3 tfrm, int row )
-{
-    VmathSoaVector4 result;
-    vmathSoaT3GetRow(&result, &tfrm, row);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3Inverse_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3Inverse(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3OrthoInverse_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3OrthoInverse(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3AbsPerElem_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3AbsPerElem(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaT3MulV3_V( VmathSoaTransform3 tfrm, VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3MulV3(&result, &tfrm, &vec);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaT3MulP3_V( VmathSoaTransform3 tfrm, VmathSoaPoint3 pnt )
-{
-    VmathSoaPoint3 result;
-    vmathSoaT3MulP3(&result, &tfrm, &pnt);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3Mul_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3Mul(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MulPerElem_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MulPerElem(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeIdentity_V( )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeIdentity(&result);
-    return result;
-}
-
-static inline void vmathSoaT3SetUpper3x3_V( VmathSoaTransform3 *result, VmathSoaMatrix3 tfrm )
-{
-    vmathSoaT3SetUpper3x3(result, &tfrm);
-}
-
-static inline VmathSoaMatrix3 vmathSoaT3GetUpper3x3_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaT3GetUpper3x3(&result, &tfrm);
-    return result;
-}
-
-static inline void vmathSoaT3SetTranslation_V( VmathSoaTransform3 *result, VmathSoaVector3 translateVec )
-{
-    vmathSoaT3SetTranslation(result, &translateVec);
-}
-
-static inline VmathSoaVector3 vmathSoaT3GetTranslation_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3GetTranslation(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationX_V( vec_float4 radians )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationY_V( vec_float4 radians )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationZ_V( vec_float4 radians )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationQ_V( VmathSoaQuat unitQuat )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeScale_V( VmathSoaVector3 scaleVec )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3AppendScale_V( VmathSoaTransform3 tfrm, VmathSoaVector3 scaleVec )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3AppendScale(&result, &tfrm, &scaleVec);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaTransform3 tfrm )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3PrependScale(&result, &scaleVec, &tfrm);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeTranslation_V( VmathSoaVector3 translateVec )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeTranslation(&result, &translateVec);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3Select_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1, vec_uint4 select1 )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3Select(&result, &tfrm0, &tfrm1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaT3Print_V( VmathSoaTransform3 tfrm )
-{
-    vmathSoaT3Print(&tfrm);
-}
-
-static inline void vmathSoaT3Prints_V( VmathSoaTransform3 tfrm, const char *name )
-{
-    vmathSoaT3Prints(&tfrm, name);
-}
-
-#endif
-
-static inline VmathSoaQuat vmathSoaQMakeFromM3_V( VmathSoaMatrix3 tfrm )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFromM3(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaV3Outer_V( VmathSoaVector3 tfrm0, VmathSoaVector3 tfrm1 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaV3Outer(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaV4Outer_V( VmathSoaVector4 tfrm0, VmathSoaVector4 tfrm1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaV4Outer(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3RowMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3RowMul(&result, &vec, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaV3CrossMatrix_V( VmathSoaVector3 vec )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaV3CrossMatrix(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaV3CrossMatrixMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaV3CrossMatrixMul(&result, &vec, &mat);
-    return result;
-}
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_SOA_V_C_H
+#define _VECTORMATH_MAT_SOA_V_C_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ */
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromQ_V( VmathSoaQuat unitQuat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeFromQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromCols_V( VmathSoaVector3 _col0, VmathSoaVector3 _col1, VmathSoaVector3 _col2 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeFromCols(&result, &_col0, &_col1, &_col2);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromAos_V( VmathMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeFromAos(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeFrom4Aos_V( VmathMatrix3 mat0, VmathMatrix3 mat1, VmathMatrix3 mat2, VmathMatrix3 mat3 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeFrom4Aos(&result, &mat0, &mat1, &mat2, &mat3);
+    return result;
+}
+
+static inline void vmathSoaM3Get4Aos_V( VmathSoaMatrix3 mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 )
+{
+    vmathSoaM3Get4Aos(&mat, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaM3SetCol0_V( VmathSoaMatrix3 *result, VmathSoaVector3 _col0 )
+{
+    vmathSoaM3SetCol0(result, &_col0);
+}
+
+static inline void vmathSoaM3SetCol1_V( VmathSoaMatrix3 *result, VmathSoaVector3 _col1 )
+{
+    vmathSoaM3SetCol1(result, &_col1);
+}
+
+static inline void vmathSoaM3SetCol2_V( VmathSoaMatrix3 *result, VmathSoaVector3 _col2 )
+{
+    vmathSoaM3SetCol2(result, &_col2);
+}
+
+static inline void vmathSoaM3SetCol_V( VmathSoaMatrix3 *result, int col, VmathSoaVector3 vec )
+{
+    vmathSoaM3SetCol(result, col, &vec);
+}
+
+static inline void vmathSoaM3SetRow_V( VmathSoaMatrix3 *result, int row, VmathSoaVector3 vec )
+{
+    vmathSoaM3SetRow(result, row, &vec);
+}
+
+static inline void vmathSoaM3SetElem_V( VmathSoaMatrix3 *result, int col, int row, vec_float4 val )
+{
+    vmathSoaM3SetElem(result, col, row, val);
+}
+
+static inline vec_float4 vmathSoaM3GetElem_V( VmathSoaMatrix3 mat, int col, int row )
+{
+    return vmathSoaM3GetElem(&mat, col, row);
+}
+
+static inline VmathSoaVector3 vmathSoaM3GetCol0_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaVector3 result;
+    vmathSoaM3GetCol0(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaM3GetCol1_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaVector3 result;
+    vmathSoaM3GetCol1(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaM3GetCol2_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaVector3 result;
+    vmathSoaM3GetCol2(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaM3GetCol_V( VmathSoaMatrix3 mat, int col )
+{
+    VmathSoaVector3 result;
+    vmathSoaM3GetCol(&result, &mat, col);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaM3GetRow_V( VmathSoaMatrix3 mat, int row )
+{
+    VmathSoaVector3 result;
+    vmathSoaM3GetRow(&result, &mat, row);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Transpose_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Transpose(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Inverse_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Inverse(&result, &mat);
+    return result;
+}
+
+static inline vec_float4 vmathSoaM3Determinant_V( VmathSoaMatrix3 mat )
+{
+    return vmathSoaM3Determinant(&mat);
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Add_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Add(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Sub_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Sub(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Neg_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Neg(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3AbsPerElem_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3AbsPerElem(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3ScalarMul_V( VmathSoaMatrix3 mat, vec_float4 scalar )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3ScalarMul(&result, &mat, scalar);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaM3MulV3_V( VmathSoaMatrix3 mat, VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaM3MulV3(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Mul_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Mul(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MulPerElem_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MulPerElem(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeIdentity_V( )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeIdentity(&result);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationX_V( vec_float4 radians )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationY_V( vec_float4 radians )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZ_V( vec_float4 radians )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationQ_V( VmathSoaQuat unitQuat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeScale_V( VmathSoaVector3 scaleVec )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3AppendScale_V( VmathSoaMatrix3 mat, VmathSoaVector3 scaleVec )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3AppendScale(&result, &mat, &scaleVec);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3PrependScale(&result, &scaleVec, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Select_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1, vec_uint4 select1 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Select(&result, &mat0, &mat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaM3Print_V( VmathSoaMatrix3 mat )
+{
+    vmathSoaM3Print(&mat);
+}
+
+static inline void vmathSoaM3Prints_V( VmathSoaMatrix3 mat, const char *name )
+{
+    vmathSoaM3Prints(&mat, name);
+}
+
+#endif
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromT3_V( VmathSoaTransform3 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFromT3(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromCols_V( VmathSoaVector4 _col0, VmathSoaVector4 _col1, VmathSoaVector4 _col2, VmathSoaVector4 _col3 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromM3V3_V( VmathSoaMatrix3 mat, VmathSoaVector3 translateVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFromM3V3(&result, &mat, &translateVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFromQV3(&result, &unitQuat, &translateVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromAos_V( VmathMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFromAos(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFrom4Aos_V( VmathMatrix4 mat0, VmathMatrix4 mat1, VmathMatrix4 mat2, VmathMatrix4 mat3 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFrom4Aos(&result, &mat0, &mat1, &mat2, &mat3);
+    return result;
+}
+
+static inline void vmathSoaM4Get4Aos_V( VmathSoaMatrix4 mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 )
+{
+    vmathSoaM4Get4Aos(&mat, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaM4SetCol0_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col0 )
+{
+    vmathSoaM4SetCol0(result, &_col0);
+}
+
+static inline void vmathSoaM4SetCol1_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col1 )
+{
+    vmathSoaM4SetCol1(result, &_col1);
+}
+
+static inline void vmathSoaM4SetCol2_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col2 )
+{
+    vmathSoaM4SetCol2(result, &_col2);
+}
+
+static inline void vmathSoaM4SetCol3_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col3 )
+{
+    vmathSoaM4SetCol3(result, &_col3);
+}
+
+static inline void vmathSoaM4SetCol_V( VmathSoaMatrix4 *result, int col, VmathSoaVector4 vec )
+{
+    vmathSoaM4SetCol(result, col, &vec);
+}
+
+static inline void vmathSoaM4SetRow_V( VmathSoaMatrix4 *result, int row, VmathSoaVector4 vec )
+{
+    vmathSoaM4SetRow(result, row, &vec);
+}
+
+static inline void vmathSoaM4SetElem_V( VmathSoaMatrix4 *result, int col, int row, vec_float4 val )
+{
+    vmathSoaM4SetElem(result, col, row, val);
+}
+
+static inline vec_float4 vmathSoaM4GetElem_V( VmathSoaMatrix4 mat, int col, int row )
+{
+    return vmathSoaM4GetElem(&mat, col, row);
+}
+
+static inline VmathSoaVector4 vmathSoaM4GetCol0_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4GetCol0(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4GetCol1_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4GetCol1(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4GetCol2_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4GetCol2(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4GetCol3_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4GetCol3(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4GetCol_V( VmathSoaMatrix4 mat, int col )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4GetCol(&result, &mat, col);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4GetRow_V( VmathSoaMatrix4 mat, int row )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4GetRow(&result, &mat, row);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Transpose_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Transpose(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Inverse_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Inverse(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4AffineInverse_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4AffineInverse(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4OrthoInverse_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4OrthoInverse(&result, &mat);
+    return result;
+}
+
+static inline vec_float4 vmathSoaM4Determinant_V( VmathSoaMatrix4 mat )
+{
+    return vmathSoaM4Determinant(&mat);
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Add_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Add(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Sub_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Sub(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Neg_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Neg(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4AbsPerElem_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4AbsPerElem(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4ScalarMul_V( VmathSoaMatrix4 mat, vec_float4 scalar )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4ScalarMul(&result, &mat, scalar);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4MulV4_V( VmathSoaMatrix4 mat, VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4MulV4(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4MulV3_V( VmathSoaMatrix4 mat, VmathSoaVector3 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4MulV3(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4MulP3_V( VmathSoaMatrix4 mat, VmathSoaPoint3 pnt )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4MulP3(&result, &mat, &pnt);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Mul_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Mul(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MulT3_V( VmathSoaMatrix4 mat, VmathSoaTransform3 tfrm1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MulT3(&result, &mat, &tfrm1);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MulPerElem_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MulPerElem(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeIdentity_V( )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeIdentity(&result);
+    return result;
+}
+
+static inline void vmathSoaM4SetUpper3x3_V( VmathSoaMatrix4 *result, VmathSoaMatrix3 mat3 )
+{
+    vmathSoaM4SetUpper3x3(result, &mat3);
+}
+
+static inline VmathSoaMatrix3 vmathSoaM4GetUpper3x3_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM4GetUpper3x3(&result, &mat);
+    return result;
+}
+
+static inline void vmathSoaM4SetTranslation_V( VmathSoaMatrix4 *result, VmathSoaVector3 translateVec )
+{
+    vmathSoaM4SetTranslation(result, &translateVec);
+}
+
+static inline VmathSoaVector3 vmathSoaM4GetTranslation_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaVector3 result;
+    vmathSoaM4GetTranslation(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationX_V( vec_float4 radians )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationY_V( vec_float4 radians )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZ_V( vec_float4 radians )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZYX_V( VmathSoaVector3 radiansXYZ )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationQ_V( VmathSoaQuat unitQuat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeScale_V( VmathSoaVector3 scaleVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4AppendScale_V( VmathSoaMatrix4 mat, VmathSoaVector3 scaleVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4AppendScale(&result, &mat, &scaleVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4PrependScale(&result, &scaleVec, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeTranslation_V( VmathSoaVector3 translateVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeTranslation(&result, &translateVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeLookAt_V( VmathSoaPoint3 eyePos, VmathSoaPoint3 lookAtPos, VmathSoaVector3 upVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeLookAt(&result, &eyePos, &lookAtPos, &upVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakePerspective_V( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakePerspective(&result, fovyRadians, aspect, zNear, zFar);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFrustum_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFrustum(&result, left, right, bottom, top, zNear, zFar);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeOrthographic_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeOrthographic(&result, left, right, bottom, top, zNear, zFar);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Select_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1, vec_uint4 select1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Select(&result, &mat0, &mat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaM4Print_V( VmathSoaMatrix4 mat )
+{
+    vmathSoaM4Print(&mat);
+}
+
+static inline void vmathSoaM4Prints_V( VmathSoaMatrix4 mat, const char *name )
+{
+    vmathSoaM4Prints(&mat, name);
+}
+
+#endif
+
+static inline VmathSoaTransform3 vmathSoaT3MakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeFromCols_V( VmathSoaVector3 _col0, VmathSoaVector3 _col1, VmathSoaVector3 _col2, VmathSoaVector3 _col3 )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeFromM3V3_V( VmathSoaMatrix3 tfrm, VmathSoaVector3 translateVec )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeFromM3V3(&result, &tfrm, &translateVec);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeFromQV3(&result, &unitQuat, &translateVec);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeFromAos_V( VmathTransform3 tfrm )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeFromAos(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeFrom4Aos_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, VmathTransform3 tfrm2, VmathTransform3 tfrm3 )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeFrom4Aos(&result, &tfrm0, &tfrm1, &tfrm2, &tfrm3);
+    return result;
+}
+
+static inline void vmathSoaT3Get4Aos_V( VmathSoaTransform3 tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 )
+{
+    vmathSoaT3Get4Aos(&tfrm, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaT3SetCol0_V( VmathSoaTransform3 *result, VmathSoaVector3 _col0 )
+{
+    vmathSoaT3SetCol0(result, &_col0);
+}
+
+static inline void vmathSoaT3SetCol1_V( VmathSoaTransform3 *result, VmathSoaVector3 _col1 )
+{
+    vmathSoaT3SetCol1(result, &_col1);
+}
+
+static inline void vmathSoaT3SetCol2_V( VmathSoaTransform3 *result, VmathSoaVector3 _col2 )
+{
+    vmathSoaT3SetCol2(result, &_col2);
+}
+
+static inline void vmathSoaT3SetCol3_V( VmathSoaTransform3 *result, VmathSoaVector3 _col3 )
+{
+    vmathSoaT3SetCol3(result, &_col3);
+}
+
+static inline void vmathSoaT3SetCol_V( VmathSoaTransform3 *result, int col, VmathSoaVector3 vec )
+{
+    vmathSoaT3SetCol(result, col, &vec);
+}
+
+static inline void vmathSoaT3SetRow_V( VmathSoaTransform3 *result, int row, VmathSoaVector4 vec )
+{
+    vmathSoaT3SetRow(result, row, &vec);
+}
+
+static inline void vmathSoaT3SetElem_V( VmathSoaTransform3 *result, int col, int row, vec_float4 val )
+{
+    vmathSoaT3SetElem(result, col, row, val);
+}
+
+static inline vec_float4 vmathSoaT3GetElem_V( VmathSoaTransform3 tfrm, int col, int row )
+{
+    return vmathSoaT3GetElem(&tfrm, col, row);
+}
+
+static inline VmathSoaVector3 vmathSoaT3GetCol0_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3GetCol0(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaT3GetCol1_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3GetCol1(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaT3GetCol2_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3GetCol2(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaT3GetCol3_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3GetCol3(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaT3GetCol_V( VmathSoaTransform3 tfrm, int col )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3GetCol(&result, &tfrm, col);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaT3GetRow_V( VmathSoaTransform3 tfrm, int row )
+{
+    VmathSoaVector4 result;
+    vmathSoaT3GetRow(&result, &tfrm, row);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3Inverse_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3Inverse(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3OrthoInverse_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3OrthoInverse(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3AbsPerElem_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3AbsPerElem(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaT3MulV3_V( VmathSoaTransform3 tfrm, VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3MulV3(&result, &tfrm, &vec);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaT3MulP3_V( VmathSoaTransform3 tfrm, VmathSoaPoint3 pnt )
+{
+    VmathSoaPoint3 result;
+    vmathSoaT3MulP3(&result, &tfrm, &pnt);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3Mul_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3Mul(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MulPerElem_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MulPerElem(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeIdentity_V( )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeIdentity(&result);
+    return result;
+}
+
+static inline void vmathSoaT3SetUpper3x3_V( VmathSoaTransform3 *result, VmathSoaMatrix3 tfrm )
+{
+    vmathSoaT3SetUpper3x3(result, &tfrm);
+}
+
+static inline VmathSoaMatrix3 vmathSoaT3GetUpper3x3_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaT3GetUpper3x3(&result, &tfrm);
+    return result;
+}
+
+static inline void vmathSoaT3SetTranslation_V( VmathSoaTransform3 *result, VmathSoaVector3 translateVec )
+{
+    vmathSoaT3SetTranslation(result, &translateVec);
+}
+
+static inline VmathSoaVector3 vmathSoaT3GetTranslation_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3GetTranslation(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationX_V( vec_float4 radians )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationY_V( vec_float4 radians )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationZ_V( vec_float4 radians )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationQ_V( VmathSoaQuat unitQuat )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeScale_V( VmathSoaVector3 scaleVec )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3AppendScale_V( VmathSoaTransform3 tfrm, VmathSoaVector3 scaleVec )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3AppendScale(&result, &tfrm, &scaleVec);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaTransform3 tfrm )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3PrependScale(&result, &scaleVec, &tfrm);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeTranslation_V( VmathSoaVector3 translateVec )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeTranslation(&result, &translateVec);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3Select_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1, vec_uint4 select1 )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3Select(&result, &tfrm0, &tfrm1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaT3Print_V( VmathSoaTransform3 tfrm )
+{
+    vmathSoaT3Print(&tfrm);
+}
+
+static inline void vmathSoaT3Prints_V( VmathSoaTransform3 tfrm, const char *name )
+{
+    vmathSoaT3Prints(&tfrm, name);
+}
+
+#endif
+
+static inline VmathSoaQuat vmathSoaQMakeFromM3_V( VmathSoaMatrix3 tfrm )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFromM3(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaV3Outer_V( VmathSoaVector3 tfrm0, VmathSoaVector3 tfrm1 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaV3Outer(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaV4Outer_V( VmathSoaVector4 tfrm0, VmathSoaVector4 tfrm1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaV4Outer(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3RowMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3RowMul(&result, &vec, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaV3CrossMatrix_V( VmathSoaVector3 vec )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaV3CrossMatrix(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaV3CrossMatrixMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaV3CrossMatrixMul(&result, &vec, &mat);
+    return result;
+}
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_aos.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_aos.h
index a9e2ab729..2915f4f07 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_aos.h
@@ -1,379 +1,379 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_AOS_C_H
-#define _VECTORMATH_QUAT_AOS_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline void vmathQCopy( VmathQuat *result, const VmathQuat *quat )
-{
-    result->vec128 = quat->vec128;
-}
-
-static inline void vmathQMakeFromElems( VmathQuat *result, float _x, float _y, float _z, float _w )
-{
-    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) &
-        __builtin_constant_p(_z) & __builtin_constant_p(_w)) {
-        result->vec128 = (vec_float4){_x, _y, _z, _w};
-    } else {
-        float *pf = (float *)&result->vec128;
-        pf[0] = _x;
-        pf[1] = _y;
-        pf[2] = _z;
-        pf[3] = _w;
-    }
-}
-
-static inline void vmathQMakeFromV3Scalar( VmathQuat *result, const VmathVector3 *xyz, float _w )
-{
-    result->vec128 = xyz->vec128;
-    _vmathVfSetElement(result->vec128, _w, 3);
-}
-
-static inline void vmathQMakeFromV4( VmathQuat *result, const VmathVector4 *vec )
-{
-    result->vec128 = vec->vec128;
-}
-
-static inline void vmathQMakeFromScalar( VmathQuat *result, float scalar )
-{
-    result->vec128 = _vmathVfSplatScalar(scalar);
-}
-
-static inline void vmathQMakeFrom128( VmathQuat *result, vec_float4 vf4 )
-{
-    result->vec128 = vf4;
-}
-
-static inline void vmathQMakeIdentity( VmathQuat *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_0001;
-}
-
-static inline void vmathQLerp( VmathQuat *result, float t, const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    VmathQuat tmpQ_0, tmpQ_1;
-    vmathQSub( &tmpQ_0, quat1, quat0 );
-    vmathQScalarMul( &tmpQ_1, &tmpQ_0, t );
-    vmathQAdd( result, quat0, &tmpQ_1 );
-}
-
-static inline void vmathQSlerp( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1 )
-{
-    VmathQuat start;
-    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    vec_uint4 selectMask;
-    cosAngle = _vmathVfDot4( unitQuat0->vec128, unitQuat1->vec128 );
-    cosAngle = vec_splat( cosAngle, 0 );
-    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), cosAngle );
-    cosAngle = vec_sel( cosAngle, negatef4( cosAngle ), selectMask );
-    start.vec128 = vec_sel( unitQuat0->vec128, negatef4( unitQuat0->vec128 ), selectMask );
-    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = _vmathVfSplatScalar(t);
-    oneMinusT = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
-    angles = vec_mergeh( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
-    angles = vec_mergeh( angles, oneMinusT );
-    angles = vec_madd( angles, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sines = sinf4( angles );
-    scales = divf4( sines, vec_splat( sines, 0 ) );
-    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
-    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
-    result->vec128 = vec_madd( start.vec128, scale0, vec_madd( unitQuat1->vec128, scale1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-static inline void vmathQSquad( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1, const VmathQuat *unitQuat2, const VmathQuat *unitQuat3 )
-{
-    VmathQuat tmp0, tmp1;
-    vmathQSlerp( &tmp0, t, unitQuat0, unitQuat3 );
-    vmathQSlerp( &tmp1, t, unitQuat1, unitQuat2 );
-    vmathQSlerp( result, ( ( 2.0f * t ) * ( 1.0f - t ) ), &tmp0, &tmp1 );
-}
-
-static inline vec_float4 vmathQGet128( const VmathQuat *quat )
-{
-    return quat->vec128;
-}
-
-static inline void vmathQSetXYZ( VmathQuat *result, const VmathVector3 *vec )
-{
-    result->vec128 = vec_sel( vec->vec128, result->vec128, _VECTORMATH_MASK_0x000F );
-}
-
-static inline void vmathQGetXYZ( VmathVector3 *result, const VmathQuat *quat )
-{
-    result->vec128 = quat->vec128;
-}
-
-static inline void vmathQSetX( VmathQuat *result, float _x )
-{
-    _vmathVfSetElement(result->vec128, _x, 0);
-}
-
-static inline float vmathQGetX( const VmathQuat *quat )
-{
-    return _vmathVfGetElement(quat->vec128, 0);
-}
-
-static inline void vmathQSetY( VmathQuat *result, float _y )
-{
-    _vmathVfSetElement(result->vec128, _y, 1);
-}
-
-static inline float vmathQGetY( const VmathQuat *quat )
-{
-    return _vmathVfGetElement(quat->vec128, 1);
-}
-
-static inline void vmathQSetZ( VmathQuat *result, float _z )
-{
-    _vmathVfSetElement(result->vec128, _z, 2);
-}
-
-static inline float vmathQGetZ( const VmathQuat *quat )
-{
-    return _vmathVfGetElement(quat->vec128, 2);
-}
-
-static inline void vmathQSetW( VmathQuat *result, float _w )
-{
-    _vmathVfSetElement(result->vec128, _w, 3);
-}
-
-static inline float vmathQGetW( const VmathQuat *quat )
-{
-    return _vmathVfGetElement(quat->vec128, 3);
-}
-
-static inline void vmathQSetElem( VmathQuat *result, int idx, float value )
-{
-    _vmathVfSetElement(result->vec128, value, idx);
-}
-
-static inline float vmathQGetElem( const VmathQuat *quat, int idx )
-{
-    return _vmathVfGetElement(quat->vec128, idx);
-}
-
-static inline void vmathQAdd( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    result->vec128 = vec_add( quat0->vec128, quat1->vec128 );
-}
-
-static inline void vmathQSub( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    result->vec128 = vec_sub( quat0->vec128, quat1->vec128 );
-}
-
-static inline void vmathQScalarMul( VmathQuat *result, const VmathQuat *quat, float scalar )
-{
-    result->vec128 = vec_madd( quat->vec128, _vmathVfSplatScalar(scalar), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathQScalarDiv( VmathQuat *result, const VmathQuat *quat, float scalar )
-{
-    result->vec128 = divf4( quat->vec128, _vmathVfSplatScalar(scalar) );
-}
-
-static inline void vmathQNeg( VmathQuat *result, const VmathQuat *quat )
-{
-    result->vec128 = negatef4( quat->vec128 );
-}
-
-static inline float vmathQDot( const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    vec_float4 result = _vmathVfDot4( quat0->vec128, quat1->vec128 );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline float vmathQNorm( const VmathQuat *quat )
-{
-    vec_float4 result = _vmathVfDot4( quat->vec128, quat->vec128 );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline float vmathQLength( const VmathQuat *quat )
-{
-    return sqrtf( vmathQNorm( quat ) );
-}
-
-static inline void vmathQNormalize( VmathQuat *result, const VmathQuat *quat )
-{
-    vec_float4 dot = _vmathVfDot4( quat->vec128, quat->vec128 );
-    result->vec128 = vec_madd( quat->vec128, rsqrtf4( dot ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathQMakeRotationArc( VmathQuat *result, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
-{
-    VmathVector3 crossVec, tmpV3_0;
-    vec_float4 cosAngle, cosAngleX2Plus2, recipCosHalfAngleX2, cosHalfAngleX2, res;
-    cosAngle = _vmathVfDot3( unitVec0->vec128, unitVec1->vec128 );
-    cosAngle = vec_splat( cosAngle, 0 );
-    cosAngleX2Plus2 = vec_madd( cosAngle, ((vec_float4){2.0f,2.0f,2.0f,2.0f}), ((vec_float4){2.0f,2.0f,2.0f,2.0f}) );
-    recipCosHalfAngleX2 = rsqrtf4( cosAngleX2Plus2 );
-    cosHalfAngleX2 = vec_madd( recipCosHalfAngleX2, cosAngleX2Plus2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathV3Cross( &tmpV3_0, unitVec0, unitVec1 );
-    crossVec = tmpV3_0;
-    res = vec_madd( crossVec.vec128, recipCosHalfAngleX2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    res = vec_sel( res, vec_madd( cosHalfAngleX2, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), _VECTORMATH_MASK_0x000F );
-    result->vec128 = res;
-}
-
-static inline void vmathQMakeRotationAxis( VmathQuat *result, float radians, const VmathVector3 *unitVec )
-{
-    vec_float4 s, c, angle, res;
-    angle = vec_madd( _vmathVfSplatScalar(radians), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    res = vec_sel( vec_madd( unitVec->vec128, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c, _VECTORMATH_MASK_0x000F );
-    result->vec128 = res;
-}
-
-static inline void vmathQMakeRotationX( VmathQuat *result, float radians )
-{
-    vec_float4 s, c, angle, res;
-    angle = vec_madd( _vmathVfSplatScalar(radians), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    res = vec_sel( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, _VECTORMATH_MASK_0xF000 );
-    res = vec_sel( res, c, _VECTORMATH_MASK_0x000F );
-    result->vec128 = res;
-}
-
-static inline void vmathQMakeRotationY( VmathQuat *result, float radians )
-{
-    vec_float4 s, c, angle, res;
-    angle = vec_madd( _vmathVfSplatScalar(radians), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    res = vec_sel( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, _VECTORMATH_MASK_0x0F00 );
-    res = vec_sel( res, c, _VECTORMATH_MASK_0x000F );
-    result->vec128 = res;
-}
-
-static inline void vmathQMakeRotationZ( VmathQuat *result, float radians )
-{
-    vec_float4 s, c, angle, res;
-    angle = vec_madd( _vmathVfSplatScalar(radians), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    res = vec_sel( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, _VECTORMATH_MASK_0x00F0 );
-    res = vec_sel( res, c, _VECTORMATH_MASK_0x000F );
-    result->vec128 = res;
-}
-
-static inline void vmathQMul( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    vec_float4 ldata, rdata, qv, tmp0, tmp1, tmp2, tmp3;
-    vec_float4 product, l_wxyz, r_wxyz, xy, qw;
-    ldata = quat0->vec128;
-    rdata = quat1->vec128;
-    tmp0 = vec_perm( ldata, ldata, _VECTORMATH_PERM_YZXW );
-    tmp1 = vec_perm( rdata, rdata, _VECTORMATH_PERM_ZXYW );
-    tmp2 = vec_perm( ldata, ldata, _VECTORMATH_PERM_ZXYW );
-    tmp3 = vec_perm( rdata, rdata, _VECTORMATH_PERM_YZXW );
-    qv = vec_madd( vec_splat( ldata, 3 ), rdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qv = vec_madd( vec_splat( rdata, 3 ), ldata, qv );
-    qv = vec_madd( tmp0, tmp1, qv );
-    qv = vec_nmsub( tmp2, tmp3, qv );
-    product = vec_madd( ldata, rdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    l_wxyz = vec_sld( ldata, ldata, 12 );
-    r_wxyz = vec_sld( rdata, rdata, 12 );
-    qw = vec_nmsub( l_wxyz, r_wxyz, product );
-    xy = vec_madd( l_wxyz, r_wxyz, product );
-    qw = vec_sub( qw, vec_sld( xy, xy, 8 ) );
-    result->vec128 = vec_sel( qv, qw, _VECTORMATH_MASK_0x000F );
-}
-
-static inline void vmathQRotate( VmathVector3 *result, const VmathQuat *quat, const VmathVector3 *vec )
-{
-    vec_float4 qdata, vdata, product, tmp0, tmp1, tmp2, tmp3, wwww, qv, qw, res;
-    qdata = quat->vec128;
-    vdata = vec->vec128;
-    tmp0 = vec_perm( qdata, qdata, _VECTORMATH_PERM_YZXW );
-    tmp1 = vec_perm( vdata, vdata, _VECTORMATH_PERM_ZXYW );
-    tmp2 = vec_perm( qdata, qdata, _VECTORMATH_PERM_ZXYW );
-    tmp3 = vec_perm( vdata, vdata, _VECTORMATH_PERM_YZXW );
-    wwww = vec_splat( qdata, 3 );
-    qv = vec_madd( wwww, vdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qv = vec_madd( tmp0, tmp1, qv );
-    qv = vec_nmsub( tmp2, tmp3, qv );
-    product = vec_madd( qdata, vdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qw = vec_madd( vec_sld( qdata, qdata, 4 ), vec_sld( vdata, vdata, 4 ), product );
-    qw = vec_add( vec_sld( product, product, 8 ), qw );
-    tmp1 = vec_perm( qv, qv, _VECTORMATH_PERM_ZXYW );
-    tmp3 = vec_perm( qv, qv, _VECTORMATH_PERM_YZXW );
-    res = vec_madd( vec_splat( qw, 0 ), qdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    res = vec_madd( wwww, qv, res );
-    res = vec_madd( tmp0, tmp1, res );
-    res = vec_nmsub( tmp2, tmp3, res );
-    result->vec128 = res;
-}
-
-static inline void vmathQConj( VmathQuat *result, const VmathQuat *quat )
-{
-    result->vec128 = vec_xor( quat->vec128, ((vec_float4)(vec_int4){0x80000000,0x80000000,0x80000000,0}) );
-}
-
-static inline void vmathQSelect( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, unsigned int select1 )
-{
-    unsigned int tmp;
-    tmp = (unsigned int)-(select1 > 0);
-    result->vec128 = vec_sel( quat0->vec128, quat1->vec128, _vmathVuiSplatScalar(tmp) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathQPrint( const VmathQuat *quat )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = quat->vec128;
-    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-static inline void vmathQPrints( const VmathQuat *quat, const char *name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = quat->vec128;
-    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_AOS_C_H
+#define _VECTORMATH_QUAT_AOS_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline void vmathQCopy( VmathQuat *result, const VmathQuat *quat )
+{
+    result->vec128 = quat->vec128;
+}
+
+static inline void vmathQMakeFromElems( VmathQuat *result, float _x, float _y, float _z, float _w )
+{
+    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) &
+        __builtin_constant_p(_z) & __builtin_constant_p(_w)) {
+        result->vec128 = (vec_float4){_x, _y, _z, _w};
+    } else {
+        float *pf = (float *)&result->vec128;
+        pf[0] = _x;
+        pf[1] = _y;
+        pf[2] = _z;
+        pf[3] = _w;
+    }
+}
+
+static inline void vmathQMakeFromV3Scalar( VmathQuat *result, const VmathVector3 *xyz, float _w )
+{
+    result->vec128 = xyz->vec128;
+    _vmathVfSetElement(result->vec128, _w, 3);
+}
+
+static inline void vmathQMakeFromV4( VmathQuat *result, const VmathVector4 *vec )
+{
+    result->vec128 = vec->vec128;
+}
+
+static inline void vmathQMakeFromScalar( VmathQuat *result, float scalar )
+{
+    result->vec128 = _vmathVfSplatScalar(scalar);
+}
+
+static inline void vmathQMakeFrom128( VmathQuat *result, vec_float4 vf4 )
+{
+    result->vec128 = vf4;
+}
+
+static inline void vmathQMakeIdentity( VmathQuat *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_0001;
+}
+
+static inline void vmathQLerp( VmathQuat *result, float t, const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    VmathQuat tmpQ_0, tmpQ_1;
+    vmathQSub( &tmpQ_0, quat1, quat0 );
+    vmathQScalarMul( &tmpQ_1, &tmpQ_0, t );
+    vmathQAdd( result, quat0, &tmpQ_1 );
+}
+
+static inline void vmathQSlerp( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1 )
+{
+    VmathQuat start;
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    vec_uint4 selectMask;
+    cosAngle = _vmathVfDot4( unitQuat0->vec128, unitQuat1->vec128 );
+    cosAngle = vec_splat( cosAngle, 0 );
+    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), cosAngle );
+    cosAngle = vec_sel( cosAngle, negatef4( cosAngle ), selectMask );
+    start.vec128 = vec_sel( unitQuat0->vec128, negatef4( unitQuat0->vec128 ), selectMask );
+    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = _vmathVfSplatScalar(t);
+    oneMinusT = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
+    angles = vec_mergeh( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
+    angles = vec_mergeh( angles, oneMinusT );
+    angles = vec_madd( angles, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sines = sinf4( angles );
+    scales = divf4( sines, vec_splat( sines, 0 ) );
+    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
+    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
+    result->vec128 = vec_madd( start.vec128, scale0, vec_madd( unitQuat1->vec128, scale1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+static inline void vmathQSquad( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1, const VmathQuat *unitQuat2, const VmathQuat *unitQuat3 )
+{
+    VmathQuat tmp0, tmp1;
+    vmathQSlerp( &tmp0, t, unitQuat0, unitQuat3 );
+    vmathQSlerp( &tmp1, t, unitQuat1, unitQuat2 );
+    vmathQSlerp( result, ( ( 2.0f * t ) * ( 1.0f - t ) ), &tmp0, &tmp1 );
+}
+
+static inline vec_float4 vmathQGet128( const VmathQuat *quat )
+{
+    return quat->vec128;
+}
+
+static inline void vmathQSetXYZ( VmathQuat *result, const VmathVector3 *vec )
+{
+    result->vec128 = vec_sel( vec->vec128, result->vec128, _VECTORMATH_MASK_0x000F );
+}
+
+static inline void vmathQGetXYZ( VmathVector3 *result, const VmathQuat *quat )
+{
+    result->vec128 = quat->vec128;
+}
+
+static inline void vmathQSetX( VmathQuat *result, float _x )
+{
+    _vmathVfSetElement(result->vec128, _x, 0);
+}
+
+static inline float vmathQGetX( const VmathQuat *quat )
+{
+    return _vmathVfGetElement(quat->vec128, 0);
+}
+
+static inline void vmathQSetY( VmathQuat *result, float _y )
+{
+    _vmathVfSetElement(result->vec128, _y, 1);
+}
+
+static inline float vmathQGetY( const VmathQuat *quat )
+{
+    return _vmathVfGetElement(quat->vec128, 1);
+}
+
+static inline void vmathQSetZ( VmathQuat *result, float _z )
+{
+    _vmathVfSetElement(result->vec128, _z, 2);
+}
+
+static inline float vmathQGetZ( const VmathQuat *quat )
+{
+    return _vmathVfGetElement(quat->vec128, 2);
+}
+
+static inline void vmathQSetW( VmathQuat *result, float _w )
+{
+    _vmathVfSetElement(result->vec128, _w, 3);
+}
+
+static inline float vmathQGetW( const VmathQuat *quat )
+{
+    return _vmathVfGetElement(quat->vec128, 3);
+}
+
+static inline void vmathQSetElem( VmathQuat *result, int idx, float value )
+{
+    _vmathVfSetElement(result->vec128, value, idx);
+}
+
+static inline float vmathQGetElem( const VmathQuat *quat, int idx )
+{
+    return _vmathVfGetElement(quat->vec128, idx);
+}
+
+static inline void vmathQAdd( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    result->vec128 = vec_add( quat0->vec128, quat1->vec128 );
+}
+
+static inline void vmathQSub( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    result->vec128 = vec_sub( quat0->vec128, quat1->vec128 );
+}
+
+static inline void vmathQScalarMul( VmathQuat *result, const VmathQuat *quat, float scalar )
+{
+    result->vec128 = vec_madd( quat->vec128, _vmathVfSplatScalar(scalar), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathQScalarDiv( VmathQuat *result, const VmathQuat *quat, float scalar )
+{
+    result->vec128 = divf4( quat->vec128, _vmathVfSplatScalar(scalar) );
+}
+
+static inline void vmathQNeg( VmathQuat *result, const VmathQuat *quat )
+{
+    result->vec128 = negatef4( quat->vec128 );
+}
+
+static inline float vmathQDot( const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    vec_float4 result = _vmathVfDot4( quat0->vec128, quat1->vec128 );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline float vmathQNorm( const VmathQuat *quat )
+{
+    vec_float4 result = _vmathVfDot4( quat->vec128, quat->vec128 );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline float vmathQLength( const VmathQuat *quat )
+{
+    return sqrtf( vmathQNorm( quat ) );
+}
+
+static inline void vmathQNormalize( VmathQuat *result, const VmathQuat *quat )
+{
+    vec_float4 dot = _vmathVfDot4( quat->vec128, quat->vec128 );
+    result->vec128 = vec_madd( quat->vec128, rsqrtf4( dot ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathQMakeRotationArc( VmathQuat *result, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
+{
+    VmathVector3 crossVec, tmpV3_0;
+    vec_float4 cosAngle, cosAngleX2Plus2, recipCosHalfAngleX2, cosHalfAngleX2, res;
+    cosAngle = _vmathVfDot3( unitVec0->vec128, unitVec1->vec128 );
+    cosAngle = vec_splat( cosAngle, 0 );
+    cosAngleX2Plus2 = vec_madd( cosAngle, ((vec_float4){2.0f,2.0f,2.0f,2.0f}), ((vec_float4){2.0f,2.0f,2.0f,2.0f}) );
+    recipCosHalfAngleX2 = rsqrtf4( cosAngleX2Plus2 );
+    cosHalfAngleX2 = vec_madd( recipCosHalfAngleX2, cosAngleX2Plus2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathV3Cross( &tmpV3_0, unitVec0, unitVec1 );
+    crossVec = tmpV3_0;
+    res = vec_madd( crossVec.vec128, recipCosHalfAngleX2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    res = vec_sel( res, vec_madd( cosHalfAngleX2, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), _VECTORMATH_MASK_0x000F );
+    result->vec128 = res;
+}
+
+static inline void vmathQMakeRotationAxis( VmathQuat *result, float radians, const VmathVector3 *unitVec )
+{
+    vec_float4 s, c, angle, res;
+    angle = vec_madd( _vmathVfSplatScalar(radians), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    res = vec_sel( vec_madd( unitVec->vec128, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c, _VECTORMATH_MASK_0x000F );
+    result->vec128 = res;
+}
+
+static inline void vmathQMakeRotationX( VmathQuat *result, float radians )
+{
+    vec_float4 s, c, angle, res;
+    angle = vec_madd( _vmathVfSplatScalar(radians), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    res = vec_sel( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, _VECTORMATH_MASK_0xF000 );
+    res = vec_sel( res, c, _VECTORMATH_MASK_0x000F );
+    result->vec128 = res;
+}
+
+static inline void vmathQMakeRotationY( VmathQuat *result, float radians )
+{
+    vec_float4 s, c, angle, res;
+    angle = vec_madd( _vmathVfSplatScalar(radians), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    res = vec_sel( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, _VECTORMATH_MASK_0x0F00 );
+    res = vec_sel( res, c, _VECTORMATH_MASK_0x000F );
+    result->vec128 = res;
+}
+
+static inline void vmathQMakeRotationZ( VmathQuat *result, float radians )
+{
+    vec_float4 s, c, angle, res;
+    angle = vec_madd( _vmathVfSplatScalar(radians), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    res = vec_sel( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, _VECTORMATH_MASK_0x00F0 );
+    res = vec_sel( res, c, _VECTORMATH_MASK_0x000F );
+    result->vec128 = res;
+}
+
+static inline void vmathQMul( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    vec_float4 ldata, rdata, qv, tmp0, tmp1, tmp2, tmp3;
+    vec_float4 product, l_wxyz, r_wxyz, xy, qw;
+    ldata = quat0->vec128;
+    rdata = quat1->vec128;
+    tmp0 = vec_perm( ldata, ldata, _VECTORMATH_PERM_YZXW );
+    tmp1 = vec_perm( rdata, rdata, _VECTORMATH_PERM_ZXYW );
+    tmp2 = vec_perm( ldata, ldata, _VECTORMATH_PERM_ZXYW );
+    tmp3 = vec_perm( rdata, rdata, _VECTORMATH_PERM_YZXW );
+    qv = vec_madd( vec_splat( ldata, 3 ), rdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qv = vec_madd( vec_splat( rdata, 3 ), ldata, qv );
+    qv = vec_madd( tmp0, tmp1, qv );
+    qv = vec_nmsub( tmp2, tmp3, qv );
+    product = vec_madd( ldata, rdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    l_wxyz = vec_sld( ldata, ldata, 12 );
+    r_wxyz = vec_sld( rdata, rdata, 12 );
+    qw = vec_nmsub( l_wxyz, r_wxyz, product );
+    xy = vec_madd( l_wxyz, r_wxyz, product );
+    qw = vec_sub( qw, vec_sld( xy, xy, 8 ) );
+    result->vec128 = vec_sel( qv, qw, _VECTORMATH_MASK_0x000F );
+}
+
+static inline void vmathQRotate( VmathVector3 *result, const VmathQuat *quat, const VmathVector3 *vec )
+{
+    vec_float4 qdata, vdata, product, tmp0, tmp1, tmp2, tmp3, wwww, qv, qw, res;
+    qdata = quat->vec128;
+    vdata = vec->vec128;
+    tmp0 = vec_perm( qdata, qdata, _VECTORMATH_PERM_YZXW );
+    tmp1 = vec_perm( vdata, vdata, _VECTORMATH_PERM_ZXYW );
+    tmp2 = vec_perm( qdata, qdata, _VECTORMATH_PERM_ZXYW );
+    tmp3 = vec_perm( vdata, vdata, _VECTORMATH_PERM_YZXW );
+    wwww = vec_splat( qdata, 3 );
+    qv = vec_madd( wwww, vdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qv = vec_madd( tmp0, tmp1, qv );
+    qv = vec_nmsub( tmp2, tmp3, qv );
+    product = vec_madd( qdata, vdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qw = vec_madd( vec_sld( qdata, qdata, 4 ), vec_sld( vdata, vdata, 4 ), product );
+    qw = vec_add( vec_sld( product, product, 8 ), qw );
+    tmp1 = vec_perm( qv, qv, _VECTORMATH_PERM_ZXYW );
+    tmp3 = vec_perm( qv, qv, _VECTORMATH_PERM_YZXW );
+    res = vec_madd( vec_splat( qw, 0 ), qdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    res = vec_madd( wwww, qv, res );
+    res = vec_madd( tmp0, tmp1, res );
+    res = vec_nmsub( tmp2, tmp3, res );
+    result->vec128 = res;
+}
+
+static inline void vmathQConj( VmathQuat *result, const VmathQuat *quat )
+{
+    result->vec128 = vec_xor( quat->vec128, ((vec_float4)(vec_int4){0x80000000,0x80000000,0x80000000,0}) );
+}
+
+static inline void vmathQSelect( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, unsigned int select1 )
+{
+    unsigned int tmp;
+    tmp = (unsigned int)-(select1 > 0);
+    result->vec128 = vec_sel( quat0->vec128, quat1->vec128, _vmathVuiSplatScalar(tmp) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathQPrint( const VmathQuat *quat )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = quat->vec128;
+    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+static inline void vmathQPrints( const VmathQuat *quat, const char *name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = quat->vec128;
+    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_aos_v.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_aos_v.h
index cc519d805..04cf6ccf9 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_aos_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_aos_v.h
@@ -1,312 +1,312 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_AOS_V_C_H
-#define _VECTORMATH_QUAT_AOS_V_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline VmathQuat vmathQMakeFromElems_V( float _x, float _y, float _z, float _w )
-{
-    VmathQuat result;
-    vmathQMakeFromElems(&result, _x, _y, _z, _w);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeFromV3Scalar_V( VmathVector3 xyz, float _w )
-{
-    VmathQuat result;
-    vmathQMakeFromV3Scalar(&result, &xyz, _w);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeFromV4_V( VmathVector4 vec )
-{
-    VmathQuat result;
-    vmathQMakeFromV4(&result, &vec);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeFromScalar_V( float scalar )
-{
-    VmathQuat result;
-    vmathQMakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeFrom128_V( vec_float4 vf4 )
-{
-    VmathQuat result;
-    vmathQMakeFrom128(&result, vf4);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeIdentity_V( )
-{
-    VmathQuat result;
-    vmathQMakeIdentity(&result);
-    return result;
-}
-
-static inline VmathQuat vmathQLerp_V( float t, VmathQuat quat0, VmathQuat quat1 )
-{
-    VmathQuat result;
-    vmathQLerp(&result, t, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathQuat vmathQSlerp_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1 )
-{
-    VmathQuat result;
-    vmathQSlerp(&result, t, &unitQuat0, &unitQuat1);
-    return result;
-}
-
-static inline VmathQuat vmathQSquad_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1, VmathQuat unitQuat2, VmathQuat unitQuat3 )
-{
-    VmathQuat result;
-    vmathQSquad(&result, t, &unitQuat0, &unitQuat1, &unitQuat2, &unitQuat3);
-    return result;
-}
-
-static inline vec_float4 vmathQGet128_V( VmathQuat quat )
-{
-    return vmathQGet128(&quat);
-}
-
-static inline void vmathQSetXYZ_V( VmathQuat *result, VmathVector3 vec )
-{
-    vmathQSetXYZ(result, &vec);
-}
-
-static inline VmathVector3 vmathQGetXYZ_V( VmathQuat quat )
-{
-    VmathVector3 result;
-    vmathQGetXYZ(&result, &quat);
-    return result;
-}
-
-static inline void vmathQSetX_V( VmathQuat *result, float _x )
-{
-    vmathQSetX(result, _x);
-}
-
-static inline float vmathQGetX_V( VmathQuat quat )
-{
-    return vmathQGetX(&quat);
-}
-
-static inline void vmathQSetY_V( VmathQuat *result, float _y )
-{
-    vmathQSetY(result, _y);
-}
-
-static inline float vmathQGetY_V( VmathQuat quat )
-{
-    return vmathQGetY(&quat);
-}
-
-static inline void vmathQSetZ_V( VmathQuat *result, float _z )
-{
-    vmathQSetZ(result, _z);
-}
-
-static inline float vmathQGetZ_V( VmathQuat quat )
-{
-    return vmathQGetZ(&quat);
-}
-
-static inline void vmathQSetW_V( VmathQuat *result, float _w )
-{
-    vmathQSetW(result, _w);
-}
-
-static inline float vmathQGetW_V( VmathQuat quat )
-{
-    return vmathQGetW(&quat);
-}
-
-static inline void vmathQSetElem_V( VmathQuat *result, int idx, float value )
-{
-    vmathQSetElem(result, idx, value);
-}
-
-static inline float vmathQGetElem_V( VmathQuat quat, int idx )
-{
-    return vmathQGetElem(&quat, idx);
-}
-
-static inline VmathQuat vmathQAdd_V( VmathQuat quat0, VmathQuat quat1 )
-{
-    VmathQuat result;
-    vmathQAdd(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathQuat vmathQSub_V( VmathQuat quat0, VmathQuat quat1 )
-{
-    VmathQuat result;
-    vmathQSub(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathQuat vmathQScalarMul_V( VmathQuat quat, float scalar )
-{
-    VmathQuat result;
-    vmathQScalarMul(&result, &quat, scalar);
-    return result;
-}
-
-static inline VmathQuat vmathQScalarDiv_V( VmathQuat quat, float scalar )
-{
-    VmathQuat result;
-    vmathQScalarDiv(&result, &quat, scalar);
-    return result;
-}
-
-static inline VmathQuat vmathQNeg_V( VmathQuat quat )
-{
-    VmathQuat result;
-    vmathQNeg(&result, &quat);
-    return result;
-}
-
-static inline float vmathQDot_V( VmathQuat quat0, VmathQuat quat1 )
-{
-    return vmathQDot(&quat0, &quat1);
-}
-
-static inline float vmathQNorm_V( VmathQuat quat )
-{
-    return vmathQNorm(&quat);
-}
-
-static inline float vmathQLength_V( VmathQuat quat )
-{
-    return vmathQLength(&quat);
-}
-
-static inline VmathQuat vmathQNormalize_V( VmathQuat quat )
-{
-    VmathQuat result;
-    vmathQNormalize(&result, &quat);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationArc_V( VmathVector3 unitVec0, VmathVector3 unitVec1 )
-{
-    VmathQuat result;
-    vmathQMakeRotationArc(&result, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationAxis_V( float radians, VmathVector3 unitVec )
-{
-    VmathQuat result;
-    vmathQMakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationX_V( float radians )
-{
-    VmathQuat result;
-    vmathQMakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationY_V( float radians )
-{
-    VmathQuat result;
-    vmathQMakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationZ_V( float radians )
-{
-    VmathQuat result;
-    vmathQMakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathQuat vmathQMul_V( VmathQuat quat0, VmathQuat quat1 )
-{
-    VmathQuat result;
-    vmathQMul(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathVector3 vmathQRotate_V( VmathQuat quat, VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathQRotate(&result, &quat, &vec);
-    return result;
-}
-
-static inline VmathQuat vmathQConj_V( VmathQuat quat )
-{
-    VmathQuat result;
-    vmathQConj(&result, &quat);
-    return result;
-}
-
-static inline VmathQuat vmathQSelect_V( VmathQuat quat0, VmathQuat quat1, unsigned int select1 )
-{
-    VmathQuat result;
-    vmathQSelect(&result, &quat0, &quat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathQPrint_V( VmathQuat quat )
-{
-    vmathQPrint(&quat);
-}
-
-static inline void vmathQPrints_V( VmathQuat quat, const char *name )
-{
-    vmathQPrints(&quat, name);
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_AOS_V_C_H
+#define _VECTORMATH_QUAT_AOS_V_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline VmathQuat vmathQMakeFromElems_V( float _x, float _y, float _z, float _w )
+{
+    VmathQuat result;
+    vmathQMakeFromElems(&result, _x, _y, _z, _w);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeFromV3Scalar_V( VmathVector3 xyz, float _w )
+{
+    VmathQuat result;
+    vmathQMakeFromV3Scalar(&result, &xyz, _w);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeFromV4_V( VmathVector4 vec )
+{
+    VmathQuat result;
+    vmathQMakeFromV4(&result, &vec);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeFromScalar_V( float scalar )
+{
+    VmathQuat result;
+    vmathQMakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeFrom128_V( vec_float4 vf4 )
+{
+    VmathQuat result;
+    vmathQMakeFrom128(&result, vf4);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeIdentity_V( )
+{
+    VmathQuat result;
+    vmathQMakeIdentity(&result);
+    return result;
+}
+
+static inline VmathQuat vmathQLerp_V( float t, VmathQuat quat0, VmathQuat quat1 )
+{
+    VmathQuat result;
+    vmathQLerp(&result, t, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathQuat vmathQSlerp_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1 )
+{
+    VmathQuat result;
+    vmathQSlerp(&result, t, &unitQuat0, &unitQuat1);
+    return result;
+}
+
+static inline VmathQuat vmathQSquad_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1, VmathQuat unitQuat2, VmathQuat unitQuat3 )
+{
+    VmathQuat result;
+    vmathQSquad(&result, t, &unitQuat0, &unitQuat1, &unitQuat2, &unitQuat3);
+    return result;
+}
+
+static inline vec_float4 vmathQGet128_V( VmathQuat quat )
+{
+    return vmathQGet128(&quat);
+}
+
+static inline void vmathQSetXYZ_V( VmathQuat *result, VmathVector3 vec )
+{
+    vmathQSetXYZ(result, &vec);
+}
+
+static inline VmathVector3 vmathQGetXYZ_V( VmathQuat quat )
+{
+    VmathVector3 result;
+    vmathQGetXYZ(&result, &quat);
+    return result;
+}
+
+static inline void vmathQSetX_V( VmathQuat *result, float _x )
+{
+    vmathQSetX(result, _x);
+}
+
+static inline float vmathQGetX_V( VmathQuat quat )
+{
+    return vmathQGetX(&quat);
+}
+
+static inline void vmathQSetY_V( VmathQuat *result, float _y )
+{
+    vmathQSetY(result, _y);
+}
+
+static inline float vmathQGetY_V( VmathQuat quat )
+{
+    return vmathQGetY(&quat);
+}
+
+static inline void vmathQSetZ_V( VmathQuat *result, float _z )
+{
+    vmathQSetZ(result, _z);
+}
+
+static inline float vmathQGetZ_V( VmathQuat quat )
+{
+    return vmathQGetZ(&quat);
+}
+
+static inline void vmathQSetW_V( VmathQuat *result, float _w )
+{
+    vmathQSetW(result, _w);
+}
+
+static inline float vmathQGetW_V( VmathQuat quat )
+{
+    return vmathQGetW(&quat);
+}
+
+static inline void vmathQSetElem_V( VmathQuat *result, int idx, float value )
+{
+    vmathQSetElem(result, idx, value);
+}
+
+static inline float vmathQGetElem_V( VmathQuat quat, int idx )
+{
+    return vmathQGetElem(&quat, idx);
+}
+
+static inline VmathQuat vmathQAdd_V( VmathQuat quat0, VmathQuat quat1 )
+{
+    VmathQuat result;
+    vmathQAdd(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathQuat vmathQSub_V( VmathQuat quat0, VmathQuat quat1 )
+{
+    VmathQuat result;
+    vmathQSub(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathQuat vmathQScalarMul_V( VmathQuat quat, float scalar )
+{
+    VmathQuat result;
+    vmathQScalarMul(&result, &quat, scalar);
+    return result;
+}
+
+static inline VmathQuat vmathQScalarDiv_V( VmathQuat quat, float scalar )
+{
+    VmathQuat result;
+    vmathQScalarDiv(&result, &quat, scalar);
+    return result;
+}
+
+static inline VmathQuat vmathQNeg_V( VmathQuat quat )
+{
+    VmathQuat result;
+    vmathQNeg(&result, &quat);
+    return result;
+}
+
+static inline float vmathQDot_V( VmathQuat quat0, VmathQuat quat1 )
+{
+    return vmathQDot(&quat0, &quat1);
+}
+
+static inline float vmathQNorm_V( VmathQuat quat )
+{
+    return vmathQNorm(&quat);
+}
+
+static inline float vmathQLength_V( VmathQuat quat )
+{
+    return vmathQLength(&quat);
+}
+
+static inline VmathQuat vmathQNormalize_V( VmathQuat quat )
+{
+    VmathQuat result;
+    vmathQNormalize(&result, &quat);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationArc_V( VmathVector3 unitVec0, VmathVector3 unitVec1 )
+{
+    VmathQuat result;
+    vmathQMakeRotationArc(&result, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationAxis_V( float radians, VmathVector3 unitVec )
+{
+    VmathQuat result;
+    vmathQMakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationX_V( float radians )
+{
+    VmathQuat result;
+    vmathQMakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationY_V( float radians )
+{
+    VmathQuat result;
+    vmathQMakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationZ_V( float radians )
+{
+    VmathQuat result;
+    vmathQMakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathQuat vmathQMul_V( VmathQuat quat0, VmathQuat quat1 )
+{
+    VmathQuat result;
+    vmathQMul(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathVector3 vmathQRotate_V( VmathQuat quat, VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathQRotate(&result, &quat, &vec);
+    return result;
+}
+
+static inline VmathQuat vmathQConj_V( VmathQuat quat )
+{
+    VmathQuat result;
+    vmathQConj(&result, &quat);
+    return result;
+}
+
+static inline VmathQuat vmathQSelect_V( VmathQuat quat0, VmathQuat quat1, unsigned int select1 )
+{
+    VmathQuat result;
+    vmathQSelect(&result, &quat0, &quat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathQPrint_V( VmathQuat quat )
+{
+    vmathQPrint(&quat);
+}
+
+static inline void vmathQPrints_V( VmathQuat quat, const char *name )
+{
+    vmathQPrints(&quat, name);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_soa.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_soa.h
index 497a262fe..334c17b01 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_soa.h
@@ -1,415 +1,415 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_SOA_C_H
-#define _VECTORMATH_QUAT_SOA_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline void vmathSoaQCopy( VmathSoaQuat *result, const VmathSoaQuat *quat )
-{
-    result->x = quat->x;
-    result->y = quat->y;
-    result->z = quat->z;
-    result->w = quat->w;
-}
-
-static inline void vmathSoaQMakeFromElems( VmathSoaQuat *result, vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
-{
-    result->x = _x;
-    result->y = _y;
-    result->z = _z;
-    result->w = _w;
-}
-
-static inline void vmathSoaQMakeFromV3Scalar( VmathSoaQuat *result, const VmathSoaVector3 *xyz, vec_float4 _w )
-{
-    vmathSoaQSetXYZ( result, xyz );
-    vmathSoaQSetW( result, _w );
-}
-
-static inline void vmathSoaQMakeFromV4( VmathSoaQuat *result, const VmathSoaVector4 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-    result->w = vec->w;
-}
-
-static inline void vmathSoaQMakeFromScalar( VmathSoaQuat *result, vec_float4 scalar )
-{
-    result->x = scalar;
-    result->y = scalar;
-    result->z = scalar;
-    result->w = scalar;
-}
-
-static inline void vmathSoaQMakeFromAos( VmathSoaQuat *result, const VmathQuat *quat )
-{
-    vec_float4 vec128 = quat->vec128;
-    result->x = vec_splat( vec128, 0 );
-    result->y = vec_splat( vec128, 1 );
-    result->z = vec_splat( vec128, 2 );
-    result->w = vec_splat( vec128, 3 );
-}
-
-static inline void vmathSoaQMakeFrom4Aos( VmathSoaQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, const VmathQuat *quat2, const VmathQuat *quat3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = vec_mergeh( quat0->vec128, quat2->vec128 );
-    tmp1 = vec_mergeh( quat1->vec128, quat3->vec128 );
-    tmp2 = vec_mergel( quat0->vec128, quat2->vec128 );
-    tmp3 = vec_mergel( quat1->vec128, quat3->vec128 );
-    result->x = vec_mergeh( tmp0, tmp1 );
-    result->y = vec_mergel( tmp0, tmp1 );
-    result->z = vec_mergeh( tmp2, tmp3 );
-    result->w = vec_mergel( tmp2, tmp3 );
-}
-
-static inline void vmathSoaQMakeIdentity( VmathSoaQuat *result )
-{
-    vmathSoaQMakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-static inline void vmathSoaQLerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
-{
-    VmathSoaQuat tmpQ_0, tmpQ_1;
-    vmathSoaQSub( &tmpQ_0, quat1, quat0 );
-    vmathSoaQScalarMul( &tmpQ_1, &tmpQ_0, t );
-    vmathSoaQAdd( result, quat0, &tmpQ_1 );
-}
-
-static inline void vmathSoaQSlerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1 )
-{
-    VmathSoaQuat start, tmpQ_0, tmpQ_1;
-    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
-    vec_uint4 selectMask;
-    cosAngle = vmathSoaQDot( unitQuat0, unitQuat1 );
-    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){0.0f,0.0f,0.0f,0.0f}, cosAngle );
-    cosAngle = vec_sel( cosAngle, negatef4( cosAngle ), selectMask );
-    vmathSoaQSetX( &start, vec_sel( unitQuat0->x, negatef4( unitQuat0->x ), selectMask ) );
-    vmathSoaQSetY( &start, vec_sel( unitQuat0->y, negatef4( unitQuat0->y ), selectMask ) );
-    vmathSoaQSetZ( &start, vec_sel( unitQuat0->z, negatef4( unitQuat0->z ), selectMask ) );
-    vmathSoaQSetW( &start, vec_sel( unitQuat0->w, negatef4( unitQuat0->w ), selectMask ) );
-    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
-    angle = acosf4( cosAngle );
-    recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
-    scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
-    scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
-    vmathSoaQScalarMul( &tmpQ_0, &start, scale0 );
-    vmathSoaQScalarMul( &tmpQ_1, unitQuat1, scale1 );
-    vmathSoaQAdd( result, &tmpQ_0, &tmpQ_1 );
-}
-
-static inline void vmathSoaQSquad( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1, const VmathSoaQuat *unitQuat2, const VmathSoaQuat *unitQuat3 )
-{
-    VmathSoaQuat tmp0, tmp1;
-    vmathSoaQSlerp( &tmp0, t, unitQuat0, unitQuat3 );
-    vmathSoaQSlerp( &tmp1, t, unitQuat1, unitQuat2 );
-    vmathSoaQSlerp( result, vec_madd( vec_madd( ((vec_float4){2.0f,2.0f,2.0f,2.0f}), t, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), &tmp0, &tmp1 );
-}
-
-static inline void vmathSoaQGet4Aos( const VmathSoaQuat *quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = vec_mergeh( quat->x, quat->z );
-    tmp1 = vec_mergeh( quat->y, quat->w );
-    tmp2 = vec_mergel( quat->x, quat->z );
-    tmp3 = vec_mergel( quat->y, quat->w );
-    vmathQMakeFrom128( result0, vec_mergeh( tmp0, tmp1 ) );
-    vmathQMakeFrom128( result1, vec_mergel( tmp0, tmp1 ) );
-    vmathQMakeFrom128( result2, vec_mergeh( tmp2, tmp3 ) );
-    vmathQMakeFrom128( result3, vec_mergel( tmp2, tmp3 ) );
-}
-
-static inline void vmathSoaQSetXYZ( VmathSoaQuat *result, const VmathSoaVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-}
-
-static inline void vmathSoaQGetXYZ( VmathSoaVector3 *result, const VmathSoaQuat *quat )
-{
-    vmathSoaV3MakeFromElems( result, quat->x, quat->y, quat->z );
-}
-
-static inline void vmathSoaQSetX( VmathSoaQuat *result, vec_float4 _x )
-{
-    result->x = _x;
-}
-
-static inline vec_float4 vmathSoaQGetX( const VmathSoaQuat *quat )
-{
-    return quat->x;
-}
-
-static inline void vmathSoaQSetY( VmathSoaQuat *result, vec_float4 _y )
-{
-    result->y = _y;
-}
-
-static inline vec_float4 vmathSoaQGetY( const VmathSoaQuat *quat )
-{
-    return quat->y;
-}
-
-static inline void vmathSoaQSetZ( VmathSoaQuat *result, vec_float4 _z )
-{
-    result->z = _z;
-}
-
-static inline vec_float4 vmathSoaQGetZ( const VmathSoaQuat *quat )
-{
-    return quat->z;
-}
-
-static inline void vmathSoaQSetW( VmathSoaQuat *result, vec_float4 _w )
-{
-    result->w = _w;
-}
-
-static inline vec_float4 vmathSoaQGetW( const VmathSoaQuat *quat )
-{
-    return quat->w;
-}
-
-static inline void vmathSoaQSetElem( VmathSoaQuat *result, int idx, vec_float4 value )
-{
-    *(&result->x + idx) = value;
-}
-
-static inline vec_float4 vmathSoaQGetElem( const VmathSoaQuat *quat, int idx )
-{
-    return *(&quat->x + idx);
-}
-
-static inline void vmathSoaQAdd( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
-{
-    result->x = vec_add( quat0->x, quat1->x );
-    result->y = vec_add( quat0->y, quat1->y );
-    result->z = vec_add( quat0->z, quat1->z );
-    result->w = vec_add( quat0->w, quat1->w );
-}
-
-static inline void vmathSoaQSub( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
-{
-    result->x = vec_sub( quat0->x, quat1->x );
-    result->y = vec_sub( quat0->y, quat1->y );
-    result->z = vec_sub( quat0->z, quat1->z );
-    result->w = vec_sub( quat0->w, quat1->w );
-}
-
-static inline void vmathSoaQScalarMul( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar )
-{
-    result->x = vec_madd( quat->x, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->y = vec_madd( quat->y, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->z = vec_madd( quat->z, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->w = vec_madd( quat->w, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaQScalarDiv( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar )
-{
-    result->x = divf4( quat->x, scalar );
-    result->y = divf4( quat->y, scalar );
-    result->z = divf4( quat->z, scalar );
-    result->w = divf4( quat->w, scalar );
-}
-
-static inline void vmathSoaQNeg( VmathSoaQuat *result, const VmathSoaQuat *quat )
-{
-    result->x = negatef4( quat->x );
-    result->y = negatef4( quat->y );
-    result->z = negatef4( quat->z );
-    result->w = negatef4( quat->w );
-}
-
-static inline vec_float4 vmathSoaQDot( const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
-{
-    vec_float4 result;
-    result = vec_madd( quat0->x, quat1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( quat0->y, quat1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( quat0->z, quat1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( quat0->w, quat1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaQNorm( const VmathSoaQuat *quat )
-{
-    vec_float4 result;
-    result = vec_madd( quat->x, quat->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( quat->y, quat->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( quat->z, quat->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( quat->w, quat->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaQLength( const VmathSoaQuat *quat )
-{
-    return sqrtf4( vmathSoaQNorm( quat ) );
-}
-
-static inline void vmathSoaQNormalize( VmathSoaQuat *result, const VmathSoaQuat *quat )
-{
-    vec_float4 lenSqr, lenInv;
-    lenSqr = vmathSoaQNorm( quat );
-    lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
-    result->x = vec_madd( quat->x, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->y = vec_madd( quat->y, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->z = vec_madd( quat->z, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->w = vec_madd( quat->w, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaQMakeRotationArc( VmathSoaQuat *result, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 )
-{
-    VmathSoaVector3 tmpV3_0, tmpV3_1;
-    vec_float4 cosHalfAngleX2, recipCosHalfAngleX2;
-    cosHalfAngleX2 = sqrtf4( vec_madd( ((vec_float4){2.0f,2.0f,2.0f,2.0f}), vec_add( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vmathSoaV3Dot( unitVec0, unitVec1 ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    recipCosHalfAngleX2 = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), cosHalfAngleX2 );
-    vmathSoaV3Cross( &tmpV3_0, unitVec0, unitVec1 );
-    vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, recipCosHalfAngleX2 );
-    vmathSoaQMakeFromV3Scalar( result, &tmpV3_1, vec_madd( cosHalfAngleX2, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-static inline void vmathSoaQMakeRotationAxis( VmathSoaQuat *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
-{
-    VmathSoaVector3 tmpV3_0;
-    vec_float4 s, c, angle;
-    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    vmathSoaV3ScalarMul( &tmpV3_0, unitVec, s );
-    vmathSoaQMakeFromV3Scalar( result, &tmpV3_0, c );
-}
-
-static inline void vmathSoaQMakeRotationX( VmathSoaQuat *result, vec_float4 radians )
-{
-    vec_float4 s, c, angle;
-    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    vmathSoaQMakeFromElems( result, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c );
-}
-
-static inline void vmathSoaQMakeRotationY( VmathSoaQuat *result, vec_float4 radians )
-{
-    vec_float4 s, c, angle;
-    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    vmathSoaQMakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c );
-}
-
-static inline void vmathSoaQMakeRotationZ( VmathSoaQuat *result, vec_float4 radians )
-{
-    vec_float4 s, c, angle;
-    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    vmathSoaQMakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, c );
-}
-
-static inline void vmathSoaQMul( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
-{
-    vec_float4 tmpX, tmpY, tmpZ, tmpW;
-    tmpX = vec_sub( vec_add( vec_add( vec_madd( quat0->w, quat1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat0->x, quat1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->y, quat1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->z, quat1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpY = vec_sub( vec_add( vec_add( vec_madd( quat0->w, quat1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat0->y, quat1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->z, quat1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->x, quat1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpZ = vec_sub( vec_add( vec_add( vec_madd( quat0->w, quat1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat0->z, quat1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->x, quat1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->y, quat1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpW = vec_sub( vec_sub( vec_sub( vec_madd( quat0->w, quat1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat0->x, quat1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->y, quat1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->z, quat1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaQMakeFromElems( result, tmpX, tmpY, tmpZ, tmpW );
-}
-
-static inline void vmathSoaQRotate( VmathSoaVector3 *result, const VmathSoaQuat *quat, const VmathSoaVector3 *vec )
-{
-    vec_float4 tmpX, tmpY, tmpZ, tmpW;
-    tmpX = vec_sub( vec_add( vec_madd( quat->w, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat->y, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat->z, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpY = vec_sub( vec_add( vec_madd( quat->w, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat->z, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat->x, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpZ = vec_sub( vec_add( vec_madd( quat->w, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat->x, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat->y, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpW = vec_add( vec_add( vec_madd( quat->x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat->y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat->z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result->x = vec_add( vec_sub( vec_add( vec_madd( tmpW, quat->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmpX, quat->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpY, quat->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpZ, quat->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result->y = vec_add( vec_sub( vec_add( vec_madd( tmpW, quat->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmpY, quat->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpZ, quat->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpX, quat->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result->z = vec_add( vec_sub( vec_add( vec_madd( tmpW, quat->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmpZ, quat->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpX, quat->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpY, quat->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-static inline void vmathSoaQConj( VmathSoaQuat *result, const VmathSoaQuat *quat )
-{
-    vmathSoaQMakeFromElems( result, negatef4( quat->x ), negatef4( quat->y ), negatef4( quat->z ), quat->w );
-}
-
-static inline void vmathSoaQSelect( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1, vec_uint4 select1 )
-{
-    result->x = vec_sel( quat0->x, quat1->x, select1 );
-    result->y = vec_sel( quat0->y, quat1->y, select1 );
-    result->z = vec_sel( quat0->z, quat1->z, select1 );
-    result->w = vec_sel( quat0->w, quat1->w, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaQPrint( const VmathSoaQuat *quat )
-{
-    VmathQuat vec0, vec1, vec2, vec3;
-    vmathSoaQGet4Aos( quat, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathQPrint( &vec0 );
-    printf("slot 1:\n");
-    vmathQPrint( &vec1 );
-    printf("slot 2:\n");
-    vmathQPrint( &vec2 );
-    printf("slot 3:\n");
-    vmathQPrint( &vec3 );
-}
-
-static inline void vmathSoaQPrints( const VmathSoaQuat *quat, const char *name )
-{
-    VmathQuat vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    vmathSoaQGet4Aos( quat, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathQPrint( &vec0 );
-    printf("slot 1:\n");
-    vmathQPrint( &vec1 );
-    printf("slot 2:\n");
-    vmathQPrint( &vec2 );
-    printf("slot 3:\n");
-    vmathQPrint( &vec3 );
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_SOA_C_H
+#define _VECTORMATH_QUAT_SOA_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline void vmathSoaQCopy( VmathSoaQuat *result, const VmathSoaQuat *quat )
+{
+    result->x = quat->x;
+    result->y = quat->y;
+    result->z = quat->z;
+    result->w = quat->w;
+}
+
+static inline void vmathSoaQMakeFromElems( VmathSoaQuat *result, vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
+{
+    result->x = _x;
+    result->y = _y;
+    result->z = _z;
+    result->w = _w;
+}
+
+static inline void vmathSoaQMakeFromV3Scalar( VmathSoaQuat *result, const VmathSoaVector3 *xyz, vec_float4 _w )
+{
+    vmathSoaQSetXYZ( result, xyz );
+    vmathSoaQSetW( result, _w );
+}
+
+static inline void vmathSoaQMakeFromV4( VmathSoaQuat *result, const VmathSoaVector4 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+    result->w = vec->w;
+}
+
+static inline void vmathSoaQMakeFromScalar( VmathSoaQuat *result, vec_float4 scalar )
+{
+    result->x = scalar;
+    result->y = scalar;
+    result->z = scalar;
+    result->w = scalar;
+}
+
+static inline void vmathSoaQMakeFromAos( VmathSoaQuat *result, const VmathQuat *quat )
+{
+    vec_float4 vec128 = quat->vec128;
+    result->x = vec_splat( vec128, 0 );
+    result->y = vec_splat( vec128, 1 );
+    result->z = vec_splat( vec128, 2 );
+    result->w = vec_splat( vec128, 3 );
+}
+
+static inline void vmathSoaQMakeFrom4Aos( VmathSoaQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, const VmathQuat *quat2, const VmathQuat *quat3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = vec_mergeh( quat0->vec128, quat2->vec128 );
+    tmp1 = vec_mergeh( quat1->vec128, quat3->vec128 );
+    tmp2 = vec_mergel( quat0->vec128, quat2->vec128 );
+    tmp3 = vec_mergel( quat1->vec128, quat3->vec128 );
+    result->x = vec_mergeh( tmp0, tmp1 );
+    result->y = vec_mergel( tmp0, tmp1 );
+    result->z = vec_mergeh( tmp2, tmp3 );
+    result->w = vec_mergel( tmp2, tmp3 );
+}
+
+static inline void vmathSoaQMakeIdentity( VmathSoaQuat *result )
+{
+    vmathSoaQMakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+static inline void vmathSoaQLerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
+{
+    VmathSoaQuat tmpQ_0, tmpQ_1;
+    vmathSoaQSub( &tmpQ_0, quat1, quat0 );
+    vmathSoaQScalarMul( &tmpQ_1, &tmpQ_0, t );
+    vmathSoaQAdd( result, quat0, &tmpQ_1 );
+}
+
+static inline void vmathSoaQSlerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1 )
+{
+    VmathSoaQuat start, tmpQ_0, tmpQ_1;
+    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
+    vec_uint4 selectMask;
+    cosAngle = vmathSoaQDot( unitQuat0, unitQuat1 );
+    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){0.0f,0.0f,0.0f,0.0f}, cosAngle );
+    cosAngle = vec_sel( cosAngle, negatef4( cosAngle ), selectMask );
+    vmathSoaQSetX( &start, vec_sel( unitQuat0->x, negatef4( unitQuat0->x ), selectMask ) );
+    vmathSoaQSetY( &start, vec_sel( unitQuat0->y, negatef4( unitQuat0->y ), selectMask ) );
+    vmathSoaQSetZ( &start, vec_sel( unitQuat0->z, negatef4( unitQuat0->z ), selectMask ) );
+    vmathSoaQSetW( &start, vec_sel( unitQuat0->w, negatef4( unitQuat0->w ), selectMask ) );
+    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
+    angle = acosf4( cosAngle );
+    recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
+    scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
+    scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
+    vmathSoaQScalarMul( &tmpQ_0, &start, scale0 );
+    vmathSoaQScalarMul( &tmpQ_1, unitQuat1, scale1 );
+    vmathSoaQAdd( result, &tmpQ_0, &tmpQ_1 );
+}
+
+static inline void vmathSoaQSquad( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1, const VmathSoaQuat *unitQuat2, const VmathSoaQuat *unitQuat3 )
+{
+    VmathSoaQuat tmp0, tmp1;
+    vmathSoaQSlerp( &tmp0, t, unitQuat0, unitQuat3 );
+    vmathSoaQSlerp( &tmp1, t, unitQuat1, unitQuat2 );
+    vmathSoaQSlerp( result, vec_madd( vec_madd( ((vec_float4){2.0f,2.0f,2.0f,2.0f}), t, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), &tmp0, &tmp1 );
+}
+
+static inline void vmathSoaQGet4Aos( const VmathSoaQuat *quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = vec_mergeh( quat->x, quat->z );
+    tmp1 = vec_mergeh( quat->y, quat->w );
+    tmp2 = vec_mergel( quat->x, quat->z );
+    tmp3 = vec_mergel( quat->y, quat->w );
+    vmathQMakeFrom128( result0, vec_mergeh( tmp0, tmp1 ) );
+    vmathQMakeFrom128( result1, vec_mergel( tmp0, tmp1 ) );
+    vmathQMakeFrom128( result2, vec_mergeh( tmp2, tmp3 ) );
+    vmathQMakeFrom128( result3, vec_mergel( tmp2, tmp3 ) );
+}
+
+static inline void vmathSoaQSetXYZ( VmathSoaQuat *result, const VmathSoaVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+}
+
+static inline void vmathSoaQGetXYZ( VmathSoaVector3 *result, const VmathSoaQuat *quat )
+{
+    vmathSoaV3MakeFromElems( result, quat->x, quat->y, quat->z );
+}
+
+static inline void vmathSoaQSetX( VmathSoaQuat *result, vec_float4 _x )
+{
+    result->x = _x;
+}
+
+static inline vec_float4 vmathSoaQGetX( const VmathSoaQuat *quat )
+{
+    return quat->x;
+}
+
+static inline void vmathSoaQSetY( VmathSoaQuat *result, vec_float4 _y )
+{
+    result->y = _y;
+}
+
+static inline vec_float4 vmathSoaQGetY( const VmathSoaQuat *quat )
+{
+    return quat->y;
+}
+
+static inline void vmathSoaQSetZ( VmathSoaQuat *result, vec_float4 _z )
+{
+    result->z = _z;
+}
+
+static inline vec_float4 vmathSoaQGetZ( const VmathSoaQuat *quat )
+{
+    return quat->z;
+}
+
+static inline void vmathSoaQSetW( VmathSoaQuat *result, vec_float4 _w )
+{
+    result->w = _w;
+}
+
+static inline vec_float4 vmathSoaQGetW( const VmathSoaQuat *quat )
+{
+    return quat->w;
+}
+
+static inline void vmathSoaQSetElem( VmathSoaQuat *result, int idx, vec_float4 value )
+{
+    *(&result->x + idx) = value;
+}
+
+static inline vec_float4 vmathSoaQGetElem( const VmathSoaQuat *quat, int idx )
+{
+    return *(&quat->x + idx);
+}
+
+static inline void vmathSoaQAdd( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
+{
+    result->x = vec_add( quat0->x, quat1->x );
+    result->y = vec_add( quat0->y, quat1->y );
+    result->z = vec_add( quat0->z, quat1->z );
+    result->w = vec_add( quat0->w, quat1->w );
+}
+
+static inline void vmathSoaQSub( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
+{
+    result->x = vec_sub( quat0->x, quat1->x );
+    result->y = vec_sub( quat0->y, quat1->y );
+    result->z = vec_sub( quat0->z, quat1->z );
+    result->w = vec_sub( quat0->w, quat1->w );
+}
+
+static inline void vmathSoaQScalarMul( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar )
+{
+    result->x = vec_madd( quat->x, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->y = vec_madd( quat->y, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->z = vec_madd( quat->z, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->w = vec_madd( quat->w, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaQScalarDiv( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar )
+{
+    result->x = divf4( quat->x, scalar );
+    result->y = divf4( quat->y, scalar );
+    result->z = divf4( quat->z, scalar );
+    result->w = divf4( quat->w, scalar );
+}
+
+static inline void vmathSoaQNeg( VmathSoaQuat *result, const VmathSoaQuat *quat )
+{
+    result->x = negatef4( quat->x );
+    result->y = negatef4( quat->y );
+    result->z = negatef4( quat->z );
+    result->w = negatef4( quat->w );
+}
+
+static inline vec_float4 vmathSoaQDot( const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
+{
+    vec_float4 result;
+    result = vec_madd( quat0->x, quat1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( quat0->y, quat1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( quat0->z, quat1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( quat0->w, quat1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaQNorm( const VmathSoaQuat *quat )
+{
+    vec_float4 result;
+    result = vec_madd( quat->x, quat->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( quat->y, quat->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( quat->z, quat->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( quat->w, quat->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaQLength( const VmathSoaQuat *quat )
+{
+    return sqrtf4( vmathSoaQNorm( quat ) );
+}
+
+static inline void vmathSoaQNormalize( VmathSoaQuat *result, const VmathSoaQuat *quat )
+{
+    vec_float4 lenSqr, lenInv;
+    lenSqr = vmathSoaQNorm( quat );
+    lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
+    result->x = vec_madd( quat->x, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->y = vec_madd( quat->y, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->z = vec_madd( quat->z, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->w = vec_madd( quat->w, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaQMakeRotationArc( VmathSoaQuat *result, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 )
+{
+    VmathSoaVector3 tmpV3_0, tmpV3_1;
+    vec_float4 cosHalfAngleX2, recipCosHalfAngleX2;
+    cosHalfAngleX2 = sqrtf4( vec_madd( ((vec_float4){2.0f,2.0f,2.0f,2.0f}), vec_add( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vmathSoaV3Dot( unitVec0, unitVec1 ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    recipCosHalfAngleX2 = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), cosHalfAngleX2 );
+    vmathSoaV3Cross( &tmpV3_0, unitVec0, unitVec1 );
+    vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, recipCosHalfAngleX2 );
+    vmathSoaQMakeFromV3Scalar( result, &tmpV3_1, vec_madd( cosHalfAngleX2, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+static inline void vmathSoaQMakeRotationAxis( VmathSoaQuat *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
+{
+    VmathSoaVector3 tmpV3_0;
+    vec_float4 s, c, angle;
+    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    vmathSoaV3ScalarMul( &tmpV3_0, unitVec, s );
+    vmathSoaQMakeFromV3Scalar( result, &tmpV3_0, c );
+}
+
+static inline void vmathSoaQMakeRotationX( VmathSoaQuat *result, vec_float4 radians )
+{
+    vec_float4 s, c, angle;
+    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    vmathSoaQMakeFromElems( result, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c );
+}
+
+static inline void vmathSoaQMakeRotationY( VmathSoaQuat *result, vec_float4 radians )
+{
+    vec_float4 s, c, angle;
+    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    vmathSoaQMakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c );
+}
+
+static inline void vmathSoaQMakeRotationZ( VmathSoaQuat *result, vec_float4 radians )
+{
+    vec_float4 s, c, angle;
+    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    vmathSoaQMakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, c );
+}
+
+static inline void vmathSoaQMul( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
+{
+    vec_float4 tmpX, tmpY, tmpZ, tmpW;
+    tmpX = vec_sub( vec_add( vec_add( vec_madd( quat0->w, quat1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat0->x, quat1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->y, quat1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->z, quat1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpY = vec_sub( vec_add( vec_add( vec_madd( quat0->w, quat1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat0->y, quat1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->z, quat1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->x, quat1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpZ = vec_sub( vec_add( vec_add( vec_madd( quat0->w, quat1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat0->z, quat1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->x, quat1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->y, quat1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpW = vec_sub( vec_sub( vec_sub( vec_madd( quat0->w, quat1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat0->x, quat1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->y, quat1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->z, quat1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaQMakeFromElems( result, tmpX, tmpY, tmpZ, tmpW );
+}
+
+static inline void vmathSoaQRotate( VmathSoaVector3 *result, const VmathSoaQuat *quat, const VmathSoaVector3 *vec )
+{
+    vec_float4 tmpX, tmpY, tmpZ, tmpW;
+    tmpX = vec_sub( vec_add( vec_madd( quat->w, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat->y, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat->z, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpY = vec_sub( vec_add( vec_madd( quat->w, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat->z, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat->x, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpZ = vec_sub( vec_add( vec_madd( quat->w, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat->x, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat->y, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpW = vec_add( vec_add( vec_madd( quat->x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat->y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat->z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result->x = vec_add( vec_sub( vec_add( vec_madd( tmpW, quat->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmpX, quat->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpY, quat->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpZ, quat->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result->y = vec_add( vec_sub( vec_add( vec_madd( tmpW, quat->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmpY, quat->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpZ, quat->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpX, quat->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result->z = vec_add( vec_sub( vec_add( vec_madd( tmpW, quat->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmpZ, quat->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpX, quat->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpY, quat->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+static inline void vmathSoaQConj( VmathSoaQuat *result, const VmathSoaQuat *quat )
+{
+    vmathSoaQMakeFromElems( result, negatef4( quat->x ), negatef4( quat->y ), negatef4( quat->z ), quat->w );
+}
+
+static inline void vmathSoaQSelect( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1, vec_uint4 select1 )
+{
+    result->x = vec_sel( quat0->x, quat1->x, select1 );
+    result->y = vec_sel( quat0->y, quat1->y, select1 );
+    result->z = vec_sel( quat0->z, quat1->z, select1 );
+    result->w = vec_sel( quat0->w, quat1->w, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaQPrint( const VmathSoaQuat *quat )
+{
+    VmathQuat vec0, vec1, vec2, vec3;
+    vmathSoaQGet4Aos( quat, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathQPrint( &vec0 );
+    printf("slot 1:\n");
+    vmathQPrint( &vec1 );
+    printf("slot 2:\n");
+    vmathQPrint( &vec2 );
+    printf("slot 3:\n");
+    vmathQPrint( &vec3 );
+}
+
+static inline void vmathSoaQPrints( const VmathSoaQuat *quat, const char *name )
+{
+    VmathQuat vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    vmathSoaQGet4Aos( quat, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathQPrint( &vec0 );
+    printf("slot 1:\n");
+    vmathQPrint( &vec1 );
+    printf("slot 2:\n");
+    vmathQPrint( &vec2 );
+    printf("slot 3:\n");
+    vmathQPrint( &vec3 );
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_soa_v.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_soa_v.h
index f51b43809..601d9da92 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_soa_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_soa_v.h
@@ -1,319 +1,319 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_SOA_V_C_H
-#define _VECTORMATH_QUAT_SOA_V_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline VmathSoaQuat vmathSoaQMakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFromElems(&result, _x, _y, _z, _w);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 _w )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFromV3Scalar(&result, &xyz, _w);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeFromV4_V( VmathSoaVector4 vec )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFromV4(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeFromAos_V( VmathQuat quat )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFromAos(&result, &quat);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeFrom4Aos_V( VmathQuat quat0, VmathQuat quat1, VmathQuat quat2, VmathQuat quat3 )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFrom4Aos(&result, &quat0, &quat1, &quat2, &quat3);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeIdentity_V( )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeIdentity(&result);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQLerp_V( vec_float4 t, VmathSoaQuat quat0, VmathSoaQuat quat1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQLerp(&result, t, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQSlerp_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQSlerp(&result, t, &unitQuat0, &unitQuat1);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQSquad_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1, VmathSoaQuat unitQuat2, VmathSoaQuat unitQuat3 )
-{
-    VmathSoaQuat result;
-    vmathSoaQSquad(&result, t, &unitQuat0, &unitQuat1, &unitQuat2, &unitQuat3);
-    return result;
-}
-
-static inline void vmathSoaQGet4Aos_V( VmathSoaQuat quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 )
-{
-    vmathSoaQGet4Aos(&quat, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaQSetXYZ_V( VmathSoaQuat *result, VmathSoaVector3 vec )
-{
-    vmathSoaQSetXYZ(result, &vec);
-}
-
-static inline VmathSoaVector3 vmathSoaQGetXYZ_V( VmathSoaQuat quat )
-{
-    VmathSoaVector3 result;
-    vmathSoaQGetXYZ(&result, &quat);
-    return result;
-}
-
-static inline void vmathSoaQSetX_V( VmathSoaQuat *result, vec_float4 _x )
-{
-    vmathSoaQSetX(result, _x);
-}
-
-static inline vec_float4 vmathSoaQGetX_V( VmathSoaQuat quat )
-{
-    return vmathSoaQGetX(&quat);
-}
-
-static inline void vmathSoaQSetY_V( VmathSoaQuat *result, vec_float4 _y )
-{
-    vmathSoaQSetY(result, _y);
-}
-
-static inline vec_float4 vmathSoaQGetY_V( VmathSoaQuat quat )
-{
-    return vmathSoaQGetY(&quat);
-}
-
-static inline void vmathSoaQSetZ_V( VmathSoaQuat *result, vec_float4 _z )
-{
-    vmathSoaQSetZ(result, _z);
-}
-
-static inline vec_float4 vmathSoaQGetZ_V( VmathSoaQuat quat )
-{
-    return vmathSoaQGetZ(&quat);
-}
-
-static inline void vmathSoaQSetW_V( VmathSoaQuat *result, vec_float4 _w )
-{
-    vmathSoaQSetW(result, _w);
-}
-
-static inline vec_float4 vmathSoaQGetW_V( VmathSoaQuat quat )
-{
-    return vmathSoaQGetW(&quat);
-}
-
-static inline void vmathSoaQSetElem_V( VmathSoaQuat *result, int idx, vec_float4 value )
-{
-    vmathSoaQSetElem(result, idx, value);
-}
-
-static inline vec_float4 vmathSoaQGetElem_V( VmathSoaQuat quat, int idx )
-{
-    return vmathSoaQGetElem(&quat, idx);
-}
-
-static inline VmathSoaQuat vmathSoaQAdd_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQAdd(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQSub_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQSub(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQScalarMul_V( VmathSoaQuat quat, vec_float4 scalar )
-{
-    VmathSoaQuat result;
-    vmathSoaQScalarMul(&result, &quat, scalar);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQScalarDiv_V( VmathSoaQuat quat, vec_float4 scalar )
-{
-    VmathSoaQuat result;
-    vmathSoaQScalarDiv(&result, &quat, scalar);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQNeg_V( VmathSoaQuat quat )
-{
-    VmathSoaQuat result;
-    vmathSoaQNeg(&result, &quat);
-    return result;
-}
-
-static inline vec_float4 vmathSoaQDot_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
-{
-    return vmathSoaQDot(&quat0, &quat1);
-}
-
-static inline vec_float4 vmathSoaQNorm_V( VmathSoaQuat quat )
-{
-    return vmathSoaQNorm(&quat);
-}
-
-static inline vec_float4 vmathSoaQLength_V( VmathSoaQuat quat )
-{
-    return vmathSoaQLength(&quat);
-}
-
-static inline VmathSoaQuat vmathSoaQNormalize_V( VmathSoaQuat quat )
-{
-    VmathSoaQuat result;
-    vmathSoaQNormalize(&result, &quat);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeRotationArc_V( VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeRotationArc(&result, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeRotationX_V( vec_float4 radians )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeRotationY_V( vec_float4 radians )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeRotationZ_V( vec_float4 radians )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMul_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQMul(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaQRotate_V( VmathSoaQuat quat, VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaQRotate(&result, &quat, &vec);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQConj_V( VmathSoaQuat quat )
-{
-    VmathSoaQuat result;
-    vmathSoaQConj(&result, &quat);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQSelect_V( VmathSoaQuat quat0, VmathSoaQuat quat1, vec_uint4 select1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQSelect(&result, &quat0, &quat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaQPrint_V( VmathSoaQuat quat )
-{
-    vmathSoaQPrint(&quat);
-}
-
-static inline void vmathSoaQPrints_V( VmathSoaQuat quat, const char *name )
-{
-    vmathSoaQPrints(&quat, name);
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_SOA_V_C_H
+#define _VECTORMATH_QUAT_SOA_V_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline VmathSoaQuat vmathSoaQMakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFromElems(&result, _x, _y, _z, _w);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 _w )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFromV3Scalar(&result, &xyz, _w);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeFromV4_V( VmathSoaVector4 vec )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFromV4(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeFromAos_V( VmathQuat quat )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFromAos(&result, &quat);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeFrom4Aos_V( VmathQuat quat0, VmathQuat quat1, VmathQuat quat2, VmathQuat quat3 )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFrom4Aos(&result, &quat0, &quat1, &quat2, &quat3);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeIdentity_V( )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeIdentity(&result);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQLerp_V( vec_float4 t, VmathSoaQuat quat0, VmathSoaQuat quat1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQLerp(&result, t, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQSlerp_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQSlerp(&result, t, &unitQuat0, &unitQuat1);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQSquad_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1, VmathSoaQuat unitQuat2, VmathSoaQuat unitQuat3 )
+{
+    VmathSoaQuat result;
+    vmathSoaQSquad(&result, t, &unitQuat0, &unitQuat1, &unitQuat2, &unitQuat3);
+    return result;
+}
+
+static inline void vmathSoaQGet4Aos_V( VmathSoaQuat quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 )
+{
+    vmathSoaQGet4Aos(&quat, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaQSetXYZ_V( VmathSoaQuat *result, VmathSoaVector3 vec )
+{
+    vmathSoaQSetXYZ(result, &vec);
+}
+
+static inline VmathSoaVector3 vmathSoaQGetXYZ_V( VmathSoaQuat quat )
+{
+    VmathSoaVector3 result;
+    vmathSoaQGetXYZ(&result, &quat);
+    return result;
+}
+
+static inline void vmathSoaQSetX_V( VmathSoaQuat *result, vec_float4 _x )
+{
+    vmathSoaQSetX(result, _x);
+}
+
+static inline vec_float4 vmathSoaQGetX_V( VmathSoaQuat quat )
+{
+    return vmathSoaQGetX(&quat);
+}
+
+static inline void vmathSoaQSetY_V( VmathSoaQuat *result, vec_float4 _y )
+{
+    vmathSoaQSetY(result, _y);
+}
+
+static inline vec_float4 vmathSoaQGetY_V( VmathSoaQuat quat )
+{
+    return vmathSoaQGetY(&quat);
+}
+
+static inline void vmathSoaQSetZ_V( VmathSoaQuat *result, vec_float4 _z )
+{
+    vmathSoaQSetZ(result, _z);
+}
+
+static inline vec_float4 vmathSoaQGetZ_V( VmathSoaQuat quat )
+{
+    return vmathSoaQGetZ(&quat);
+}
+
+static inline void vmathSoaQSetW_V( VmathSoaQuat *result, vec_float4 _w )
+{
+    vmathSoaQSetW(result, _w);
+}
+
+static inline vec_float4 vmathSoaQGetW_V( VmathSoaQuat quat )
+{
+    return vmathSoaQGetW(&quat);
+}
+
+static inline void vmathSoaQSetElem_V( VmathSoaQuat *result, int idx, vec_float4 value )
+{
+    vmathSoaQSetElem(result, idx, value);
+}
+
+static inline vec_float4 vmathSoaQGetElem_V( VmathSoaQuat quat, int idx )
+{
+    return vmathSoaQGetElem(&quat, idx);
+}
+
+static inline VmathSoaQuat vmathSoaQAdd_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQAdd(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQSub_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQSub(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQScalarMul_V( VmathSoaQuat quat, vec_float4 scalar )
+{
+    VmathSoaQuat result;
+    vmathSoaQScalarMul(&result, &quat, scalar);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQScalarDiv_V( VmathSoaQuat quat, vec_float4 scalar )
+{
+    VmathSoaQuat result;
+    vmathSoaQScalarDiv(&result, &quat, scalar);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQNeg_V( VmathSoaQuat quat )
+{
+    VmathSoaQuat result;
+    vmathSoaQNeg(&result, &quat);
+    return result;
+}
+
+static inline vec_float4 vmathSoaQDot_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
+{
+    return vmathSoaQDot(&quat0, &quat1);
+}
+
+static inline vec_float4 vmathSoaQNorm_V( VmathSoaQuat quat )
+{
+    return vmathSoaQNorm(&quat);
+}
+
+static inline vec_float4 vmathSoaQLength_V( VmathSoaQuat quat )
+{
+    return vmathSoaQLength(&quat);
+}
+
+static inline VmathSoaQuat vmathSoaQNormalize_V( VmathSoaQuat quat )
+{
+    VmathSoaQuat result;
+    vmathSoaQNormalize(&result, &quat);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeRotationArc_V( VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeRotationArc(&result, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeRotationX_V( vec_float4 radians )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeRotationY_V( vec_float4 radians )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeRotationZ_V( vec_float4 radians )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMul_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQMul(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaQRotate_V( VmathSoaQuat quat, VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaQRotate(&result, &quat, &vec);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQConj_V( VmathSoaQuat quat )
+{
+    VmathSoaQuat result;
+    vmathSoaQConj(&result, &quat);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQSelect_V( VmathSoaQuat quat0, VmathSoaQuat quat1, vec_uint4 select1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQSelect(&result, &quat0, &quat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaQPrint_V( VmathSoaQuat quat )
+{
+    vmathSoaQPrint(&quat);
+}
+
+static inline void vmathSoaQPrints_V( VmathSoaQuat quat, const char *name )
+{
+    vmathSoaQPrints(&quat, name);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_aos.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_aos.h
index 256828410..58427a224 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_aos.h
@@ -1,1125 +1,1125 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_AOS_C_H
-#define _VECTORMATH_VEC_AOS_C_H
-#include <altivec.h>
-#include <simdmath.h>
-#include <stddef.h>
-#include "vec_types.h"
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- * for permutes words are labeled [x,y,z,w] [a,b,c,d]
- */
-#define _VECTORMATH_PERM_X 0x00010203
-#define _VECTORMATH_PERM_Y 0x04050607
-#define _VECTORMATH_PERM_Z 0x08090a0b
-#define _VECTORMATH_PERM_W 0x0c0d0e0f
-#define _VECTORMATH_PERM_A 0x10111213
-#define _VECTORMATH_PERM_B 0x14151617
-#define _VECTORMATH_PERM_C 0x18191a1b
-#define _VECTORMATH_PERM_D 0x1c1d1e1f
-#define _VECTORMATH_PERM_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A }
-#define _VECTORMATH_PERM_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_W }
-#define _VECTORMATH_PERM_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W }
-#define _VECTORMATH_PERM_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B }
-#define _VECTORMATH_PERM_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B, _VECTORMATH_PERM_C }
-#define _VECTORMATH_PERM_XYAW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_W }
-#define _VECTORMATH_PERM_XAZW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W }
-#define _VECTORMATH_MASK_0xF000 (vec_uint4){ 0xffffffff, 0, 0, 0 }
-#define _VECTORMATH_MASK_0x0F00 (vec_uint4){ 0, 0xffffffff, 0, 0 }
-#define _VECTORMATH_MASK_0x00F0 (vec_uint4){ 0, 0, 0xffffffff, 0 }
-#define _VECTORMATH_MASK_0x000F (vec_uint4){ 0, 0, 0, 0xffffffff }
-#define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
-#define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-static inline vec_float4 _vmathVfDot3( vec_float4 vec0, vec_float4 vec1 )
-{
-    vec_float4 result;
-    result = vec_madd( vec0, vec1, (vec_float4){0.0f,0.0f,0.0f,0.0f} );
-    result = vec_madd( vec_sld( vec0, vec0, 4 ), vec_sld( vec1, vec1, 4 ), result );
-    return vec_madd( vec_sld( vec0, vec0, 8 ), vec_sld( vec1, vec1, 8 ), result );
-}
-
-static inline vec_float4 _vmathVfDot4( vec_float4 vec0, vec_float4 vec1 )
-{
-    vec_float4 result;
-    result = vec_madd( vec0, vec1, (vec_float4){0.0f,0.0f,0.0f,0.0f} );
-    result = vec_madd( vec_sld( vec0, vec0, 4 ), vec_sld( vec1, vec1, 4 ), result );
-    return vec_add( vec_sld( result, result, 8 ), result );
-}
-
-static inline vec_float4 _vmathVfCross( vec_float4 vec0, vec_float4 vec1 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3, result;
-    tmp0 = vec_perm( vec0, vec0, _VECTORMATH_PERM_YZXW );
-    tmp1 = vec_perm( vec1, vec1, _VECTORMATH_PERM_ZXYW );
-    tmp2 = vec_perm( vec0, vec0, _VECTORMATH_PERM_ZXYW );
-    tmp3 = vec_perm( vec1, vec1, _VECTORMATH_PERM_YZXW );
-    result = vec_madd( tmp0, tmp1, (vec_float4){0.0f,0.0f,0.0f,0.0f} );
-    result = vec_nmsub( tmp2, tmp3, result );
-    return result;
-}
-
-static inline vec_uint4 _vmathVfToHalfFloatsUnpacked(vec_float4 v)
-{
-    vec_int4 bexp;
-    vec_uint4 mant, sign, hfloat;
-    vec_uint4 notZero, isInf;
-    const vec_uint4 hfloatInf = (vec_uint4){0x00007c00u,0x00007c00u,0x00007c00u,0x00007c00u};
-    const vec_uint4 mergeMant = (vec_uint4){0x000003ffu,0x000003ffu,0x000003ffu,0x000003ffu};
-    const vec_uint4 mergeSign = (vec_uint4){0x00008000u,0x00008000u,0x00008000u,0x00008000u};
-
-    sign = vec_sr((vec_uint4)v, (vec_uint4){16,16,16,16});
-    mant = vec_sr((vec_uint4)v, (vec_uint4){13,13,13,13});
-    bexp = vec_and(vec_sr((vec_int4)v, (vec_uint4){23,23,23,23}), (vec_int4){0xff,0xff,0xff,0xff});
-
-    notZero = (vec_uint4)vec_cmpgt(bexp, (vec_int4){112,112,112,112});
-    isInf = (vec_uint4)vec_cmpgt(bexp, (vec_int4){142,142,142,142});
-
-    bexp = vec_add(bexp, (vec_int4){-112,-112,-112,-112});
-    bexp = vec_sl(bexp, (vec_uint4){10,10,10,10});
-
-    hfloat = vec_sel((vec_uint4)bexp, mant, mergeMant);
-    hfloat = vec_sel((vec_uint4){0,0,0,0}, hfloat, notZero);
-    hfloat = vec_sel(hfloat, hfloatInf, isInf);
-    hfloat = vec_sel(hfloat, sign, mergeSign);
-
-    return hfloat;
-}
-
-static inline vec_ushort8 _vmath2VfToHalfFloats(vec_float4 u, vec_float4 v)
-{
-    vec_uint4 hfloat_u, hfloat_v;
-    const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
-    hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
-    hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
-    return (vec_ushort8)vec_perm(hfloat_u, hfloat_v, pack);
-}
-
-#ifndef __GNUC__
-#define __builtin_constant_p(x) 0
-#endif
-
-static inline vec_float4 _vmathVfInsert(vec_float4 dst, vec_float4 src, int slot)
-{
-#ifdef __GNUC__
-    if (__builtin_constant_p(slot)) {
-        dst = vec_sld(dst, dst, slot<<2);
-        dst = vec_sld(dst, src, 4);
-        if (slot != 3) dst = vec_sld(dst, dst, (3-slot)<<2);
-        return dst;
-    } else
-#endif
-    {
-        vec_uchar16 shiftpattern = vec_lvsr( 0, (float *)(size_t)(slot<<2) );
-        vec_uint4 selectmask = (vec_uint4)vec_perm( (vec_uint4){0,0,0,0}, _VECTORMATH_MASK_0xF000, shiftpattern );
-        return vec_sel( dst, src, selectmask );
-    }
-}
-
-#define _vmathVfGetElement(vec, slot) ((float *)&(vec))[slot]
-#ifdef _VECTORMATH_SET_CONSTS_IN_MEM
-#define _vmathVfSetElement(vec, scalar, slot) ((float *)&(vec))[slot] = scalar
-#else
-#define _vmathVfSetElement(vec, scalar, slot)                                            \
-{                                                                                        \
-    if (__builtin_constant_p(scalar)) {                                                  \
-        (vec) = _vmathVfInsert(vec, (vec_float4){scalar, scalar, scalar, scalar}, slot); \
-    } else {                                                                             \
-        ((float *)&(vec))[slot] = scalar;                                                \
-    }                                                                                    \
-}
-#endif
-
-static inline vec_float4 _vmathVfSplatScalar(float scalar)
-{
-    vec_float4 result;
-    if (__builtin_constant_p(scalar)) {
-        result = (vec_float4){scalar, scalar, scalar, scalar};
-    } else {
-        result = vec_ld(0, &scalar);
-        result = vec_splat(vec_perm(result, result, vec_lvsl(0, &scalar)), 0);
-    } 
-    return result;
-}
-
-static inline vec_uint4 _vmathVuiSplatScalar(unsigned int scalar)
-{
-    vec_uint4 result;
-    if (__builtin_constant_p(scalar)) {
-        result = (vec_uint4){scalar, scalar, scalar, scalar};
-    } else {
-        result = vec_ld(0, &scalar);
-        result = vec_splat(vec_perm(result, result, vec_lvsl(0, &scalar)), 0);
-    } 
-    return result;
-}
-
-#endif
-
-static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = vec->vec128;
-}
-
-static inline void vmathV3MakeFromElems( VmathVector3 *result, float _x, float _y, float _z )
-{
-    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) & __builtin_constant_p(_z)) {
-        result->vec128 = (vec_float4){_x, _y, _z, 0.0f};
-    } else {
-        float *pf = (float *)&result->vec128;
-        pf[0] = _x;
-        pf[1] = _y;
-        pf[2] = _z;
-    }
-}
-
-static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = pnt->vec128;
-}
-
-static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar )
-{
-    result->vec128 = _vmathVfSplatScalar(scalar);
-}
-
-static inline void vmathV3MakeFrom128( VmathVector3 *result, vec_float4 vf4 )
-{
-    result->vec128 = vf4;
-}
-
-static inline void vmathV3MakeXAxis( VmathVector3 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_1000;
-}
-
-static inline void vmathV3MakeYAxis( VmathVector3 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_0100;
-}
-
-static inline void vmathV3MakeZAxis( VmathVector3 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_0010;
-}
-
-static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    VmathVector3 tmpV3_0, tmpV3_1;
-    vmathV3Sub( &tmpV3_0, vec1, vec0 );
-    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
-    vmathV3Add( result, vec0, &tmpV3_1 );
-}
-
-static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
-{
-    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    vec_uint4 selectMask;
-    cosAngle = _vmathVfDot3( unitVec0->vec128, unitVec1->vec128 );
-    cosAngle = vec_splat( cosAngle, 0 );
-    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = _vmathVfSplatScalar(t);
-    oneMinusT = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
-    angles = vec_mergeh( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
-    angles = vec_mergeh( angles, oneMinusT );
-    angles = vec_madd( angles, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sines = sinf4( angles );
-    scales = divf4( sines, vec_splat( sines, 0 ) );
-    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
-    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
-    result->vec128 = vec_madd( unitVec0->vec128, scale0, vec_madd( unitVec1->vec128, scale1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-static inline vec_float4 vmathV3Get128( const VmathVector3 *vec )
-{
-    return vec->vec128;
-}
-
-static inline void vmathV3StoreXYZ( const VmathVector3 *vec, vec_float4 *quad )
-{
-    vec_float4 dstVec = *quad;
-    vec_uint4 mask = _VECTORMATH_MASK_0x000F;
-    dstVec = vec_sel(vec->vec128, dstVec, mask);
-    *quad = dstVec;
-}
-
-static inline void vmathV3LoadXYZArray( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyz1 = vec_sld( xyzx, yzxy, 12 );
-    xyz2 = vec_sld( yzxy, zxyz, 8 );
-    xyz3 = vec_sld( zxyz, zxyz, 4 );
-    vec0->vec128 = xyzx;
-    vec1->vec128 = xyz1;
-    vec2->vec128 = xyz2;
-    vec3->vec128 = xyz3;
-}
-
-static inline void vmathV3StoreXYZArray( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, vec_float4 *threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz;
-    xyzx = vec_perm( vec0->vec128, vec1->vec128, _VECTORMATH_PERM_XYZA );
-    yzxy = vec_perm( vec1->vec128, vec2->vec128, _VECTORMATH_PERM_YZAB );
-    zxyz = vec_perm( vec2->vec128, vec3->vec128, _VECTORMATH_PERM_ZABC );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-static inline void vmathV3StoreHalfFloats( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, const VmathVector3 *vec4, const VmathVector3 *vec5, const VmathVector3 *vec6, const VmathVector3 *vec7, vec_ushort8 *threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    vmathV3StoreXYZArray( vec0, vec1, vec2, vec3, xyz0 );
-    vmathV3StoreXYZArray( vec4, vec5, vec6, vec7, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-static inline void vmathV3SetX( VmathVector3 *result, float _x )
-{
-    _vmathVfSetElement(result->vec128, _x, 0);
-}
-
-static inline float vmathV3GetX( const VmathVector3 *vec )
-{
-    return _vmathVfGetElement(vec->vec128, 0);
-}
-
-static inline void vmathV3SetY( VmathVector3 *result, float _y )
-{
-    _vmathVfSetElement(result->vec128, _y, 1);
-}
-
-static inline float vmathV3GetY( const VmathVector3 *vec )
-{
-    return _vmathVfGetElement(vec->vec128, 1);
-}
-
-static inline void vmathV3SetZ( VmathVector3 *result, float _z )
-{
-    _vmathVfSetElement(result->vec128, _z, 2);
-}
-
-static inline float vmathV3GetZ( const VmathVector3 *vec )
-{
-    return _vmathVfGetElement(vec->vec128, 2);
-}
-
-static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value )
-{
-    _vmathVfSetElement(result->vec128, value, idx);
-}
-
-static inline float vmathV3GetElem( const VmathVector3 *vec, int idx )
-{
-    return _vmathVfGetElement(vec->vec128, idx);
-}
-
-static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = vec_add( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = vec_sub( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = vec_add( vec->vec128, pnt1->vec128 );
-}
-
-static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar )
-{
-    result->vec128 = vec_madd( vec->vec128, _vmathVfSplatScalar(scalar), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar )
-{
-    result->vec128 = divf4( vec->vec128, _vmathVfSplatScalar(scalar) );
-}
-
-static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = negatef4( vec->vec128 );
-}
-
-static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = vec_madd( vec0->vec128, vec1->vec128, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = divf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = recipf4( vec->vec128 );
-}
-
-static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = sqrtf4( vec->vec128 );
-}
-
-static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = rsqrtf4( vec->vec128 );
-}
-
-static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = fabsf4( vec->vec128 );
-}
-
-static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = copysignf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = fmaxf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline float vmathV3MaxElem( const VmathVector3 *vec )
-{
-    vec_float4 result;
-    result = fmaxf4( vec_splat( vec->vec128, 1 ), vec->vec128 );
-    result = fmaxf4( vec_splat( vec->vec128, 2 ), result );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = fminf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline float vmathV3MinElem( const VmathVector3 *vec )
-{
-    vec_float4 result;
-    result = fminf4( vec_splat( vec->vec128, 1 ), vec->vec128 );
-    result = fminf4( vec_splat( vec->vec128, 2 ), result );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline float vmathV3Sum( const VmathVector3 *vec )
-{
-    vec_float4 result;
-    result = vec_add( vec_splat( vec->vec128, 1 ), vec->vec128 );
-    result = vec_add( vec_splat( vec->vec128, 2 ), result );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    vec_float4 result = _vmathVfDot3( vec0->vec128, vec1->vec128 );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline float vmathV3LengthSqr( const VmathVector3 *vec )
-{
-    vec_float4 result = _vmathVfDot3( vec->vec128, vec->vec128 );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline float vmathV3Length( const VmathVector3 *vec )
-{
-    return sqrtf( vmathV3LengthSqr( vec ) );
-}
-
-static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec )
-{
-    vec_float4 dot = _vmathVfDot3( vec->vec128, vec->vec128 );
-    dot = vec_splat( dot, 0 );
-    result->vec128 = vec_madd( vec->vec128, rsqrtf4( dot ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = _vmathVfCross( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 )
-{
-    unsigned int tmp;
-    tmp = (unsigned int)-(select1 > 0);
-    result->vec128 = vec_sel( vec0->vec128, vec1->vec128, _vmathVuiSplatScalar(tmp) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathV3Print( const VmathVector3 *vec )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec->vec128;
-    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-static inline void vmathV3Prints( const VmathVector3 *vec, const char *name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec->vec128;
-    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-#endif
-
-static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->vec128 = vec->vec128;
-}
-
-static inline void vmathV4MakeFromElems( VmathVector4 *result, float _x, float _y, float _z, float _w )
-{
-    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) &
-        __builtin_constant_p(_z) & __builtin_constant_p(_w)) {
-        result->vec128 = (vec_float4){_x, _y, _z, _w};
-    } else {
-        float *pf = (float *)&result->vec128;
-        pf[0] = _x;
-        pf[1] = _y;
-        pf[2] = _z;
-        pf[3] = _w;
-    }
-}
-
-static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float _w )
-{
-    result->vec128 = xyz->vec128;
-    _vmathVfSetElement(result->vec128, _w, 3);
-}
-
-static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec )
-{
-    result->vec128 = vec->vec128;
-    result->vec128 = _vmathVfInsert(result->vec128, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), 3);
-}
-
-static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = pnt->vec128;
-    result->vec128 = _vmathVfInsert(result->vec128, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), 3);
-}
-
-static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat )
-{
-    result->vec128 = quat->vec128;
-}
-
-static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar )
-{
-    result->vec128 = _vmathVfSplatScalar(scalar);
-}
-
-static inline void vmathV4MakeFrom128( VmathVector4 *result, vec_float4 vf4 )
-{
-    result->vec128 = vf4;
-}
-
-static inline void vmathV4MakeXAxis( VmathVector4 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_1000;
-}
-
-static inline void vmathV4MakeYAxis( VmathVector4 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_0100;
-}
-
-static inline void vmathV4MakeZAxis( VmathVector4 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_0010;
-}
-
-static inline void vmathV4MakeWAxis( VmathVector4 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_0001;
-}
-
-static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    VmathVector4 tmpV4_0, tmpV4_1;
-    vmathV4Sub( &tmpV4_0, vec1, vec0 );
-    vmathV4ScalarMul( &tmpV4_1, &tmpV4_0, t );
-    vmathV4Add( result, vec0, &tmpV4_1 );
-}
-
-static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 )
-{
-    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    vec_uint4 selectMask;
-    cosAngle = _vmathVfDot4( unitVec0->vec128, unitVec1->vec128 );
-    cosAngle = vec_splat( cosAngle, 0 );
-    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = _vmathVfSplatScalar(t);
-    oneMinusT = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
-    angles = vec_mergeh( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
-    angles = vec_mergeh( angles, oneMinusT );
-    angles = vec_madd( angles, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sines = sinf4( angles );
-    scales = divf4( sines, vec_splat( sines, 0 ) );
-    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
-    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
-    result->vec128 = vec_madd( unitVec0->vec128, scale0, vec_madd( unitVec1->vec128, scale1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-static inline vec_float4 vmathV4Get128( const VmathVector4 *vec )
-{
-    return vec->vec128;
-}
-
-static inline void vmathV4StoreHalfFloats( const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3, vec_ushort8 *twoQuads )
-{
-    twoQuads[0] = _vmath2VfToHalfFloats(vec0->vec128, vec1->vec128);
-    twoQuads[1] = _vmath2VfToHalfFloats(vec2->vec128, vec3->vec128);
-}
-
-static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec )
-{
-    result->vec128 = vec_sel( vec->vec128, result->vec128, _VECTORMATH_MASK_0x000F );
-}
-
-static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec )
-{
-    result->vec128 = vec->vec128;
-}
-
-static inline void vmathV4SetX( VmathVector4 *result, float _x )
-{
-    _vmathVfSetElement(result->vec128, _x, 0);
-}
-
-static inline float vmathV4GetX( const VmathVector4 *vec )
-{
-    return _vmathVfGetElement(vec->vec128, 0);
-}
-
-static inline void vmathV4SetY( VmathVector4 *result, float _y )
-{
-    _vmathVfSetElement(result->vec128, _y, 1);
-}
-
-static inline float vmathV4GetY( const VmathVector4 *vec )
-{
-    return _vmathVfGetElement(vec->vec128, 1);
-}
-
-static inline void vmathV4SetZ( VmathVector4 *result, float _z )
-{
-    _vmathVfSetElement(result->vec128, _z, 2);
-}
-
-static inline float vmathV4GetZ( const VmathVector4 *vec )
-{
-    return _vmathVfGetElement(vec->vec128, 2);
-}
-
-static inline void vmathV4SetW( VmathVector4 *result, float _w )
-{
-    _vmathVfSetElement(result->vec128, _w, 3);
-}
-
-static inline float vmathV4GetW( const VmathVector4 *vec )
-{
-    return _vmathVfGetElement(vec->vec128, 3);
-}
-
-static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value )
-{
-    _vmathVfSetElement(result->vec128, value, idx);
-}
-
-static inline float vmathV4GetElem( const VmathVector4 *vec, int idx )
-{
-    return _vmathVfGetElement(vec->vec128, idx);
-}
-
-static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = vec_add( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = vec_sub( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar )
-{
-    result->vec128 = vec_madd( vec->vec128, _vmathVfSplatScalar(scalar), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar )
-{
-    result->vec128 = divf4( vec->vec128, _vmathVfSplatScalar(scalar) );
-}
-
-static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->vec128 = negatef4( vec->vec128 );
-}
-
-static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = vec_madd( vec0->vec128, vec1->vec128, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = divf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->vec128 = recipf4( vec->vec128 );
-}
-
-static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->vec128 = sqrtf4( vec->vec128 );
-}
-
-static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->vec128 = rsqrtf4( vec->vec128 );
-}
-
-static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->vec128 = fabsf4( vec->vec128 );
-}
-
-static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = copysignf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = fmaxf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline float vmathV4MaxElem( const VmathVector4 *vec )
-{
-    vec_float4 result;
-    result = fmaxf4( vec_splat( vec->vec128, 1 ), vec->vec128 );
-    result = fmaxf4( vec_splat( vec->vec128, 2 ), result );
-    result = fmaxf4( vec_splat( vec->vec128, 3 ), result );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = fminf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline float vmathV4MinElem( const VmathVector4 *vec )
-{
-    vec_float4 result;
-    result = fminf4( vec_splat( vec->vec128, 1 ), vec->vec128 );
-    result = fminf4( vec_splat( vec->vec128, 2 ), result );
-    result = fminf4( vec_splat( vec->vec128, 3 ), result );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline float vmathV4Sum( const VmathVector4 *vec )
-{
-    vec_float4 result;
-    result = vec_add( vec_splat( vec->vec128, 1 ), vec->vec128 );
-    result = vec_add( vec_splat( vec->vec128, 2 ), result );
-    result = vec_add( vec_splat( vec->vec128, 3 ), result );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    vec_float4 result = _vmathVfDot4( vec0->vec128, vec1->vec128 );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline float vmathV4LengthSqr( const VmathVector4 *vec )
-{
-    vec_float4 result = _vmathVfDot4( vec->vec128, vec->vec128 );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline float vmathV4Length( const VmathVector4 *vec )
-{
-    return sqrtf( vmathV4LengthSqr( vec ) );
-}
-
-static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec )
-{
-    vec_float4 dot = _vmathVfDot4( vec->vec128, vec->vec128 );
-    result->vec128 = vec_madd( vec->vec128, rsqrtf4( dot ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 )
-{
-    unsigned int tmp;
-    tmp = (unsigned int)-(select1 > 0);
-    result->vec128 = vec_sel( vec0->vec128, vec1->vec128, _vmathVuiSplatScalar(tmp) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathV4Print( const VmathVector4 *vec )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec->vec128;
-    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-static inline void vmathV4Prints( const VmathVector4 *vec, const char *name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec->vec128;
-    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-#endif
-
-static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = pnt->vec128;
-}
-
-static inline void vmathP3MakeFromElems( VmathPoint3 *result, float _x, float _y, float _z )
-{
-    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) & __builtin_constant_p(_z)) {
-        result->vec128 = (vec_float4){_x, _y, _z, 0.0f};
-    } else {
-        float *pf = (float *)&result->vec128;
-        pf[0] = _x;
-        pf[1] = _y;
-        pf[2] = _z;
-    }
-}
-
-static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = vec->vec128;
-}
-
-static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar )
-{
-    result->vec128 = _vmathVfSplatScalar(scalar);
-}
-
-static inline void vmathP3MakeFrom128( VmathPoint3 *result, vec_float4 vf4 )
-{
-    result->vec128 = vf4;
-}
-
-static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    VmathVector3 tmpV3_0, tmpV3_1;
-    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
-    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
-    vmathP3AddV3( result, pnt0, &tmpV3_1 );
-}
-
-static inline vec_float4 vmathP3Get128( const VmathPoint3 *pnt )
-{
-    return pnt->vec128;
-}
-
-static inline void vmathP3StoreXYZ( const VmathPoint3 *pnt, vec_float4 *quad )
-{
-    vec_float4 dstVec = *quad;
-    vec_uint4 mask = _VECTORMATH_MASK_0x000F;
-    dstVec = vec_sel(pnt->vec128, dstVec, mask);
-    *quad = dstVec;
-}
-
-static inline void vmathP3LoadXYZArray( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyz1 = vec_sld( xyzx, yzxy, 12 );
-    xyz2 = vec_sld( yzxy, zxyz, 8 );
-    xyz3 = vec_sld( zxyz, zxyz, 4 );
-    pnt0->vec128 = xyzx;
-    pnt1->vec128 = xyz1;
-    pnt2->vec128 = xyz2;
-    pnt3->vec128 = xyz3;
-}
-
-static inline void vmathP3StoreXYZArray( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, vec_float4 *threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz;
-    xyzx = vec_perm( pnt0->vec128, pnt1->vec128, _VECTORMATH_PERM_XYZA );
-    yzxy = vec_perm( pnt1->vec128, pnt2->vec128, _VECTORMATH_PERM_YZAB );
-    zxyz = vec_perm( pnt2->vec128, pnt3->vec128, _VECTORMATH_PERM_ZABC );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-static inline void vmathP3StoreHalfFloats( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, const VmathPoint3 *pnt4, const VmathPoint3 *pnt5, const VmathPoint3 *pnt6, const VmathPoint3 *pnt7, vec_ushort8 *threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    vmathP3StoreXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
-    vmathP3StoreXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-static inline void vmathP3SetX( VmathPoint3 *result, float _x )
-{
-    _vmathVfSetElement(result->vec128, _x, 0);
-}
-
-static inline float vmathP3GetX( const VmathPoint3 *pnt )
-{
-    return _vmathVfGetElement(pnt->vec128, 0);
-}
-
-static inline void vmathP3SetY( VmathPoint3 *result, float _y )
-{
-    _vmathVfSetElement(result->vec128, _y, 1);
-}
-
-static inline float vmathP3GetY( const VmathPoint3 *pnt )
-{
-    return _vmathVfGetElement(pnt->vec128, 1);
-}
-
-static inline void vmathP3SetZ( VmathPoint3 *result, float _z )
-{
-    _vmathVfSetElement(result->vec128, _z, 2);
-}
-
-static inline float vmathP3GetZ( const VmathPoint3 *pnt )
-{
-    return _vmathVfGetElement(pnt->vec128, 2);
-}
-
-static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value )
-{
-    _vmathVfSetElement(result->vec128, value, idx);
-}
-
-static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx )
-{
-    return _vmathVfGetElement(pnt->vec128, idx);
-}
-
-static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = vec_sub( pnt0->vec128, pnt1->vec128 );
-}
-
-static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
-{
-    result->vec128 = vec_add( pnt->vec128, vec1->vec128 );
-}
-
-static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
-{
-    result->vec128 = vec_sub( pnt->vec128, vec1->vec128 );
-}
-
-static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = vec_madd( pnt0->vec128, pnt1->vec128, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = divf4( pnt0->vec128, pnt1->vec128 );
-}
-
-static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = recipf4( pnt->vec128 );
-}
-
-static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = sqrtf4( pnt->vec128 );
-}
-
-static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = rsqrtf4( pnt->vec128 );
-}
-
-static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = fabsf4( pnt->vec128 );
-}
-
-static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = copysignf4( pnt0->vec128, pnt1->vec128 );
-}
-
-static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = fmaxf4( pnt0->vec128, pnt1->vec128 );
-}
-
-static inline float vmathP3MaxElem( const VmathPoint3 *pnt )
-{
-    vec_float4 result;
-    result = fmaxf4( vec_splat( pnt->vec128, 1 ), pnt->vec128 );
-    result = fmaxf4( vec_splat( pnt->vec128, 2 ), result );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = fminf4( pnt0->vec128, pnt1->vec128 );
-}
-
-static inline float vmathP3MinElem( const VmathPoint3 *pnt )
-{
-    vec_float4 result;
-    result = fminf4( vec_splat( pnt->vec128, 1 ), pnt->vec128 );
-    result = fminf4( vec_splat( pnt->vec128, 2 ), result );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline float vmathP3Sum( const VmathPoint3 *pnt )
-{
-    vec_float4 result;
-    result = vec_add( vec_splat( pnt->vec128, 1 ), pnt->vec128 );
-    result = vec_add( vec_splat( pnt->vec128, 2 ), result );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal )
-{
-    VmathPoint3 tmpP3_0;
-    vmathP3MakeFromScalar( &tmpP3_0, scaleVal );
-    vmathP3MulPerElem( result, pnt, &tmpP3_0 );
-}
-
-static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec )
-{
-    VmathPoint3 tmpP3_0;
-    vmathP3MakeFromV3( &tmpP3_0, scaleVec );
-    vmathP3MulPerElem( result, pnt, &tmpP3_0 );
-}
-
-static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec )
-{
-    vec_float4 result = _vmathVfDot3( pnt->vec128, unitVec->vec128 );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt )
-{
-    VmathVector3 tmpV3_0;
-    vmathV3MakeFromP3( &tmpV3_0, pnt );
-    return vmathV3LengthSqr( &tmpV3_0 );
-}
-
-static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt )
-{
-    VmathVector3 tmpV3_0;
-    vmathV3MakeFromP3( &tmpV3_0, pnt );
-    return vmathV3Length( &tmpV3_0 );
-}
-
-static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    VmathVector3 tmpV3_0;
-    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
-    return vmathV3LengthSqr( &tmpV3_0 );
-}
-
-static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    VmathVector3 tmpV3_0;
-    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
-    return vmathV3Length( &tmpV3_0 );
-}
-
-static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 )
-{
-    unsigned int tmp;
-    tmp = (unsigned int)-(select1 > 0);
-    result->vec128 = vec_sel( pnt0->vec128, pnt1->vec128, _vmathVuiSplatScalar(tmp) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathP3Print( const VmathPoint3 *pnt )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = pnt->vec128;
-    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = pnt->vec128;
-    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_AOS_C_H
+#define _VECTORMATH_VEC_AOS_C_H
+#include <altivec.h>
+#include <simdmath.h>
+#include <stddef.h>
+#include "vec_types.h"
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ * for permutes words are labeled [x,y,z,w] [a,b,c,d]
+ */
+#define _VECTORMATH_PERM_X 0x00010203
+#define _VECTORMATH_PERM_Y 0x04050607
+#define _VECTORMATH_PERM_Z 0x08090a0b
+#define _VECTORMATH_PERM_W 0x0c0d0e0f
+#define _VECTORMATH_PERM_A 0x10111213
+#define _VECTORMATH_PERM_B 0x14151617
+#define _VECTORMATH_PERM_C 0x18191a1b
+#define _VECTORMATH_PERM_D 0x1c1d1e1f
+#define _VECTORMATH_PERM_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A }
+#define _VECTORMATH_PERM_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B }
+#define _VECTORMATH_PERM_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B, _VECTORMATH_PERM_C }
+#define _VECTORMATH_PERM_XYAW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_XAZW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W }
+#define _VECTORMATH_MASK_0xF000 (vec_uint4){ 0xffffffff, 0, 0, 0 }
+#define _VECTORMATH_MASK_0x0F00 (vec_uint4){ 0, 0xffffffff, 0, 0 }
+#define _VECTORMATH_MASK_0x00F0 (vec_uint4){ 0, 0, 0xffffffff, 0 }
+#define _VECTORMATH_MASK_0x000F (vec_uint4){ 0, 0, 0, 0xffffffff }
+#define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
+#define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+static inline vec_float4 _vmathVfDot3( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 result;
+    result = vec_madd( vec0, vec1, (vec_float4){0.0f,0.0f,0.0f,0.0f} );
+    result = vec_madd( vec_sld( vec0, vec0, 4 ), vec_sld( vec1, vec1, 4 ), result );
+    return vec_madd( vec_sld( vec0, vec0, 8 ), vec_sld( vec1, vec1, 8 ), result );
+}
+
+static inline vec_float4 _vmathVfDot4( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 result;
+    result = vec_madd( vec0, vec1, (vec_float4){0.0f,0.0f,0.0f,0.0f} );
+    result = vec_madd( vec_sld( vec0, vec0, 4 ), vec_sld( vec1, vec1, 4 ), result );
+    return vec_add( vec_sld( result, result, 8 ), result );
+}
+
+static inline vec_float4 _vmathVfCross( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3, result;
+    tmp0 = vec_perm( vec0, vec0, _VECTORMATH_PERM_YZXW );
+    tmp1 = vec_perm( vec1, vec1, _VECTORMATH_PERM_ZXYW );
+    tmp2 = vec_perm( vec0, vec0, _VECTORMATH_PERM_ZXYW );
+    tmp3 = vec_perm( vec1, vec1, _VECTORMATH_PERM_YZXW );
+    result = vec_madd( tmp0, tmp1, (vec_float4){0.0f,0.0f,0.0f,0.0f} );
+    result = vec_nmsub( tmp2, tmp3, result );
+    return result;
+}
+
+static inline vec_uint4 _vmathVfToHalfFloatsUnpacked(vec_float4 v)
+{
+    vec_int4 bexp;
+    vec_uint4 mant, sign, hfloat;
+    vec_uint4 notZero, isInf;
+    const vec_uint4 hfloatInf = (vec_uint4){0x00007c00u,0x00007c00u,0x00007c00u,0x00007c00u};
+    const vec_uint4 mergeMant = (vec_uint4){0x000003ffu,0x000003ffu,0x000003ffu,0x000003ffu};
+    const vec_uint4 mergeSign = (vec_uint4){0x00008000u,0x00008000u,0x00008000u,0x00008000u};
+
+    sign = vec_sr((vec_uint4)v, (vec_uint4){16,16,16,16});
+    mant = vec_sr((vec_uint4)v, (vec_uint4){13,13,13,13});
+    bexp = vec_and(vec_sr((vec_int4)v, (vec_uint4){23,23,23,23}), (vec_int4){0xff,0xff,0xff,0xff});
+
+    notZero = (vec_uint4)vec_cmpgt(bexp, (vec_int4){112,112,112,112});
+    isInf = (vec_uint4)vec_cmpgt(bexp, (vec_int4){142,142,142,142});
+
+    bexp = vec_add(bexp, (vec_int4){-112,-112,-112,-112});
+    bexp = vec_sl(bexp, (vec_uint4){10,10,10,10});
+
+    hfloat = vec_sel((vec_uint4)bexp, mant, mergeMant);
+    hfloat = vec_sel((vec_uint4){0,0,0,0}, hfloat, notZero);
+    hfloat = vec_sel(hfloat, hfloatInf, isInf);
+    hfloat = vec_sel(hfloat, sign, mergeSign);
+
+    return hfloat;
+}
+
+static inline vec_ushort8 _vmath2VfToHalfFloats(vec_float4 u, vec_float4 v)
+{
+    vec_uint4 hfloat_u, hfloat_v;
+    const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
+    hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
+    hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
+    return (vec_ushort8)vec_perm(hfloat_u, hfloat_v, pack);
+}
+
+#ifndef __GNUC__
+#define __builtin_constant_p(x) 0
+#endif
+
+static inline vec_float4 _vmathVfInsert(vec_float4 dst, vec_float4 src, int slot)
+{
+#ifdef __GNUC__
+    if (__builtin_constant_p(slot)) {
+        dst = vec_sld(dst, dst, slot<<2);
+        dst = vec_sld(dst, src, 4);
+        if (slot != 3) dst = vec_sld(dst, dst, (3-slot)<<2);
+        return dst;
+    } else
+#endif
+    {
+        vec_uchar16 shiftpattern = vec_lvsr( 0, (float *)(size_t)(slot<<2) );
+        vec_uint4 selectmask = (vec_uint4)vec_perm( (vec_uint4){0,0,0,0}, _VECTORMATH_MASK_0xF000, shiftpattern );
+        return vec_sel( dst, src, selectmask );
+    }
+}
+
+#define _vmathVfGetElement(vec, slot) ((float *)&(vec))[slot]
+#ifdef _VECTORMATH_SET_CONSTS_IN_MEM
+#define _vmathVfSetElement(vec, scalar, slot) ((float *)&(vec))[slot] = scalar
+#else
+#define _vmathVfSetElement(vec, scalar, slot)                                            \
+{                                                                                        \
+    if (__builtin_constant_p(scalar)) {                                                  \
+        (vec) = _vmathVfInsert(vec, (vec_float4){scalar, scalar, scalar, scalar}, slot); \
+    } else {                                                                             \
+        ((float *)&(vec))[slot] = scalar;                                                \
+    }                                                                                    \
+}
+#endif
+
+static inline vec_float4 _vmathVfSplatScalar(float scalar)
+{
+    vec_float4 result;
+    if (__builtin_constant_p(scalar)) {
+        result = (vec_float4){scalar, scalar, scalar, scalar};
+    } else {
+        result = vec_ld(0, &scalar);
+        result = vec_splat(vec_perm(result, result, vec_lvsl(0, &scalar)), 0);
+    } 
+    return result;
+}
+
+static inline vec_uint4 _vmathVuiSplatScalar(unsigned int scalar)
+{
+    vec_uint4 result;
+    if (__builtin_constant_p(scalar)) {
+        result = (vec_uint4){scalar, scalar, scalar, scalar};
+    } else {
+        result = vec_ld(0, &scalar);
+        result = vec_splat(vec_perm(result, result, vec_lvsl(0, &scalar)), 0);
+    } 
+    return result;
+}
+
+#endif
+
+static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = vec->vec128;
+}
+
+static inline void vmathV3MakeFromElems( VmathVector3 *result, float _x, float _y, float _z )
+{
+    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) & __builtin_constant_p(_z)) {
+        result->vec128 = (vec_float4){_x, _y, _z, 0.0f};
+    } else {
+        float *pf = (float *)&result->vec128;
+        pf[0] = _x;
+        pf[1] = _y;
+        pf[2] = _z;
+    }
+}
+
+static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = pnt->vec128;
+}
+
+static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar )
+{
+    result->vec128 = _vmathVfSplatScalar(scalar);
+}
+
+static inline void vmathV3MakeFrom128( VmathVector3 *result, vec_float4 vf4 )
+{
+    result->vec128 = vf4;
+}
+
+static inline void vmathV3MakeXAxis( VmathVector3 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_1000;
+}
+
+static inline void vmathV3MakeYAxis( VmathVector3 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_0100;
+}
+
+static inline void vmathV3MakeZAxis( VmathVector3 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_0010;
+}
+
+static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    VmathVector3 tmpV3_0, tmpV3_1;
+    vmathV3Sub( &tmpV3_0, vec1, vec0 );
+    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
+    vmathV3Add( result, vec0, &tmpV3_1 );
+}
+
+static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
+{
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    vec_uint4 selectMask;
+    cosAngle = _vmathVfDot3( unitVec0->vec128, unitVec1->vec128 );
+    cosAngle = vec_splat( cosAngle, 0 );
+    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = _vmathVfSplatScalar(t);
+    oneMinusT = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
+    angles = vec_mergeh( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
+    angles = vec_mergeh( angles, oneMinusT );
+    angles = vec_madd( angles, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sines = sinf4( angles );
+    scales = divf4( sines, vec_splat( sines, 0 ) );
+    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
+    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
+    result->vec128 = vec_madd( unitVec0->vec128, scale0, vec_madd( unitVec1->vec128, scale1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+static inline vec_float4 vmathV3Get128( const VmathVector3 *vec )
+{
+    return vec->vec128;
+}
+
+static inline void vmathV3StoreXYZ( const VmathVector3 *vec, vec_float4 *quad )
+{
+    vec_float4 dstVec = *quad;
+    vec_uint4 mask = _VECTORMATH_MASK_0x000F;
+    dstVec = vec_sel(vec->vec128, dstVec, mask);
+    *quad = dstVec;
+}
+
+static inline void vmathV3LoadXYZArray( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyz1 = vec_sld( xyzx, yzxy, 12 );
+    xyz2 = vec_sld( yzxy, zxyz, 8 );
+    xyz3 = vec_sld( zxyz, zxyz, 4 );
+    vec0->vec128 = xyzx;
+    vec1->vec128 = xyz1;
+    vec2->vec128 = xyz2;
+    vec3->vec128 = xyz3;
+}
+
+static inline void vmathV3StoreXYZArray( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, vec_float4 *threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz;
+    xyzx = vec_perm( vec0->vec128, vec1->vec128, _VECTORMATH_PERM_XYZA );
+    yzxy = vec_perm( vec1->vec128, vec2->vec128, _VECTORMATH_PERM_YZAB );
+    zxyz = vec_perm( vec2->vec128, vec3->vec128, _VECTORMATH_PERM_ZABC );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+static inline void vmathV3StoreHalfFloats( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, const VmathVector3 *vec4, const VmathVector3 *vec5, const VmathVector3 *vec6, const VmathVector3 *vec7, vec_ushort8 *threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    vmathV3StoreXYZArray( vec0, vec1, vec2, vec3, xyz0 );
+    vmathV3StoreXYZArray( vec4, vec5, vec6, vec7, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+static inline void vmathV3SetX( VmathVector3 *result, float _x )
+{
+    _vmathVfSetElement(result->vec128, _x, 0);
+}
+
+static inline float vmathV3GetX( const VmathVector3 *vec )
+{
+    return _vmathVfGetElement(vec->vec128, 0);
+}
+
+static inline void vmathV3SetY( VmathVector3 *result, float _y )
+{
+    _vmathVfSetElement(result->vec128, _y, 1);
+}
+
+static inline float vmathV3GetY( const VmathVector3 *vec )
+{
+    return _vmathVfGetElement(vec->vec128, 1);
+}
+
+static inline void vmathV3SetZ( VmathVector3 *result, float _z )
+{
+    _vmathVfSetElement(result->vec128, _z, 2);
+}
+
+static inline float vmathV3GetZ( const VmathVector3 *vec )
+{
+    return _vmathVfGetElement(vec->vec128, 2);
+}
+
+static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value )
+{
+    _vmathVfSetElement(result->vec128, value, idx);
+}
+
+static inline float vmathV3GetElem( const VmathVector3 *vec, int idx )
+{
+    return _vmathVfGetElement(vec->vec128, idx);
+}
+
+static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = vec_add( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = vec_sub( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = vec_add( vec->vec128, pnt1->vec128 );
+}
+
+static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar )
+{
+    result->vec128 = vec_madd( vec->vec128, _vmathVfSplatScalar(scalar), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar )
+{
+    result->vec128 = divf4( vec->vec128, _vmathVfSplatScalar(scalar) );
+}
+
+static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = negatef4( vec->vec128 );
+}
+
+static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = vec_madd( vec0->vec128, vec1->vec128, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = divf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = recipf4( vec->vec128 );
+}
+
+static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = sqrtf4( vec->vec128 );
+}
+
+static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = rsqrtf4( vec->vec128 );
+}
+
+static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = fabsf4( vec->vec128 );
+}
+
+static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = copysignf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = fmaxf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline float vmathV3MaxElem( const VmathVector3 *vec )
+{
+    vec_float4 result;
+    result = fmaxf4( vec_splat( vec->vec128, 1 ), vec->vec128 );
+    result = fmaxf4( vec_splat( vec->vec128, 2 ), result );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = fminf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline float vmathV3MinElem( const VmathVector3 *vec )
+{
+    vec_float4 result;
+    result = fminf4( vec_splat( vec->vec128, 1 ), vec->vec128 );
+    result = fminf4( vec_splat( vec->vec128, 2 ), result );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline float vmathV3Sum( const VmathVector3 *vec )
+{
+    vec_float4 result;
+    result = vec_add( vec_splat( vec->vec128, 1 ), vec->vec128 );
+    result = vec_add( vec_splat( vec->vec128, 2 ), result );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    vec_float4 result = _vmathVfDot3( vec0->vec128, vec1->vec128 );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline float vmathV3LengthSqr( const VmathVector3 *vec )
+{
+    vec_float4 result = _vmathVfDot3( vec->vec128, vec->vec128 );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline float vmathV3Length( const VmathVector3 *vec )
+{
+    return sqrtf( vmathV3LengthSqr( vec ) );
+}
+
+static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec )
+{
+    vec_float4 dot = _vmathVfDot3( vec->vec128, vec->vec128 );
+    dot = vec_splat( dot, 0 );
+    result->vec128 = vec_madd( vec->vec128, rsqrtf4( dot ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = _vmathVfCross( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 )
+{
+    unsigned int tmp;
+    tmp = (unsigned int)-(select1 > 0);
+    result->vec128 = vec_sel( vec0->vec128, vec1->vec128, _vmathVuiSplatScalar(tmp) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathV3Print( const VmathVector3 *vec )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec->vec128;
+    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+static inline void vmathV3Prints( const VmathVector3 *vec, const char *name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec->vec128;
+    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+#endif
+
+static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->vec128 = vec->vec128;
+}
+
+static inline void vmathV4MakeFromElems( VmathVector4 *result, float _x, float _y, float _z, float _w )
+{
+    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) &
+        __builtin_constant_p(_z) & __builtin_constant_p(_w)) {
+        result->vec128 = (vec_float4){_x, _y, _z, _w};
+    } else {
+        float *pf = (float *)&result->vec128;
+        pf[0] = _x;
+        pf[1] = _y;
+        pf[2] = _z;
+        pf[3] = _w;
+    }
+}
+
+static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float _w )
+{
+    result->vec128 = xyz->vec128;
+    _vmathVfSetElement(result->vec128, _w, 3);
+}
+
+static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec )
+{
+    result->vec128 = vec->vec128;
+    result->vec128 = _vmathVfInsert(result->vec128, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), 3);
+}
+
+static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = pnt->vec128;
+    result->vec128 = _vmathVfInsert(result->vec128, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), 3);
+}
+
+static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat )
+{
+    result->vec128 = quat->vec128;
+}
+
+static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar )
+{
+    result->vec128 = _vmathVfSplatScalar(scalar);
+}
+
+static inline void vmathV4MakeFrom128( VmathVector4 *result, vec_float4 vf4 )
+{
+    result->vec128 = vf4;
+}
+
+static inline void vmathV4MakeXAxis( VmathVector4 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_1000;
+}
+
+static inline void vmathV4MakeYAxis( VmathVector4 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_0100;
+}
+
+static inline void vmathV4MakeZAxis( VmathVector4 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_0010;
+}
+
+static inline void vmathV4MakeWAxis( VmathVector4 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_0001;
+}
+
+static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    VmathVector4 tmpV4_0, tmpV4_1;
+    vmathV4Sub( &tmpV4_0, vec1, vec0 );
+    vmathV4ScalarMul( &tmpV4_1, &tmpV4_0, t );
+    vmathV4Add( result, vec0, &tmpV4_1 );
+}
+
+static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 )
+{
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    vec_uint4 selectMask;
+    cosAngle = _vmathVfDot4( unitVec0->vec128, unitVec1->vec128 );
+    cosAngle = vec_splat( cosAngle, 0 );
+    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = _vmathVfSplatScalar(t);
+    oneMinusT = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
+    angles = vec_mergeh( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
+    angles = vec_mergeh( angles, oneMinusT );
+    angles = vec_madd( angles, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sines = sinf4( angles );
+    scales = divf4( sines, vec_splat( sines, 0 ) );
+    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
+    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
+    result->vec128 = vec_madd( unitVec0->vec128, scale0, vec_madd( unitVec1->vec128, scale1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+static inline vec_float4 vmathV4Get128( const VmathVector4 *vec )
+{
+    return vec->vec128;
+}
+
+static inline void vmathV4StoreHalfFloats( const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3, vec_ushort8 *twoQuads )
+{
+    twoQuads[0] = _vmath2VfToHalfFloats(vec0->vec128, vec1->vec128);
+    twoQuads[1] = _vmath2VfToHalfFloats(vec2->vec128, vec3->vec128);
+}
+
+static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec )
+{
+    result->vec128 = vec_sel( vec->vec128, result->vec128, _VECTORMATH_MASK_0x000F );
+}
+
+static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec )
+{
+    result->vec128 = vec->vec128;
+}
+
+static inline void vmathV4SetX( VmathVector4 *result, float _x )
+{
+    _vmathVfSetElement(result->vec128, _x, 0);
+}
+
+static inline float vmathV4GetX( const VmathVector4 *vec )
+{
+    return _vmathVfGetElement(vec->vec128, 0);
+}
+
+static inline void vmathV4SetY( VmathVector4 *result, float _y )
+{
+    _vmathVfSetElement(result->vec128, _y, 1);
+}
+
+static inline float vmathV4GetY( const VmathVector4 *vec )
+{
+    return _vmathVfGetElement(vec->vec128, 1);
+}
+
+static inline void vmathV4SetZ( VmathVector4 *result, float _z )
+{
+    _vmathVfSetElement(result->vec128, _z, 2);
+}
+
+static inline float vmathV4GetZ( const VmathVector4 *vec )
+{
+    return _vmathVfGetElement(vec->vec128, 2);
+}
+
+static inline void vmathV4SetW( VmathVector4 *result, float _w )
+{
+    _vmathVfSetElement(result->vec128, _w, 3);
+}
+
+static inline float vmathV4GetW( const VmathVector4 *vec )
+{
+    return _vmathVfGetElement(vec->vec128, 3);
+}
+
+static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value )
+{
+    _vmathVfSetElement(result->vec128, value, idx);
+}
+
+static inline float vmathV4GetElem( const VmathVector4 *vec, int idx )
+{
+    return _vmathVfGetElement(vec->vec128, idx);
+}
+
+static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = vec_add( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = vec_sub( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar )
+{
+    result->vec128 = vec_madd( vec->vec128, _vmathVfSplatScalar(scalar), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar )
+{
+    result->vec128 = divf4( vec->vec128, _vmathVfSplatScalar(scalar) );
+}
+
+static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->vec128 = negatef4( vec->vec128 );
+}
+
+static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = vec_madd( vec0->vec128, vec1->vec128, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = divf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->vec128 = recipf4( vec->vec128 );
+}
+
+static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->vec128 = sqrtf4( vec->vec128 );
+}
+
+static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->vec128 = rsqrtf4( vec->vec128 );
+}
+
+static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->vec128 = fabsf4( vec->vec128 );
+}
+
+static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = copysignf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = fmaxf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline float vmathV4MaxElem( const VmathVector4 *vec )
+{
+    vec_float4 result;
+    result = fmaxf4( vec_splat( vec->vec128, 1 ), vec->vec128 );
+    result = fmaxf4( vec_splat( vec->vec128, 2 ), result );
+    result = fmaxf4( vec_splat( vec->vec128, 3 ), result );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = fminf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline float vmathV4MinElem( const VmathVector4 *vec )
+{
+    vec_float4 result;
+    result = fminf4( vec_splat( vec->vec128, 1 ), vec->vec128 );
+    result = fminf4( vec_splat( vec->vec128, 2 ), result );
+    result = fminf4( vec_splat( vec->vec128, 3 ), result );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline float vmathV4Sum( const VmathVector4 *vec )
+{
+    vec_float4 result;
+    result = vec_add( vec_splat( vec->vec128, 1 ), vec->vec128 );
+    result = vec_add( vec_splat( vec->vec128, 2 ), result );
+    result = vec_add( vec_splat( vec->vec128, 3 ), result );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    vec_float4 result = _vmathVfDot4( vec0->vec128, vec1->vec128 );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline float vmathV4LengthSqr( const VmathVector4 *vec )
+{
+    vec_float4 result = _vmathVfDot4( vec->vec128, vec->vec128 );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline float vmathV4Length( const VmathVector4 *vec )
+{
+    return sqrtf( vmathV4LengthSqr( vec ) );
+}
+
+static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec )
+{
+    vec_float4 dot = _vmathVfDot4( vec->vec128, vec->vec128 );
+    result->vec128 = vec_madd( vec->vec128, rsqrtf4( dot ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 )
+{
+    unsigned int tmp;
+    tmp = (unsigned int)-(select1 > 0);
+    result->vec128 = vec_sel( vec0->vec128, vec1->vec128, _vmathVuiSplatScalar(tmp) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathV4Print( const VmathVector4 *vec )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec->vec128;
+    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+static inline void vmathV4Prints( const VmathVector4 *vec, const char *name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec->vec128;
+    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+#endif
+
+static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = pnt->vec128;
+}
+
+static inline void vmathP3MakeFromElems( VmathPoint3 *result, float _x, float _y, float _z )
+{
+    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) & __builtin_constant_p(_z)) {
+        result->vec128 = (vec_float4){_x, _y, _z, 0.0f};
+    } else {
+        float *pf = (float *)&result->vec128;
+        pf[0] = _x;
+        pf[1] = _y;
+        pf[2] = _z;
+    }
+}
+
+static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = vec->vec128;
+}
+
+static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar )
+{
+    result->vec128 = _vmathVfSplatScalar(scalar);
+}
+
+static inline void vmathP3MakeFrom128( VmathPoint3 *result, vec_float4 vf4 )
+{
+    result->vec128 = vf4;
+}
+
+static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    VmathVector3 tmpV3_0, tmpV3_1;
+    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
+    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
+    vmathP3AddV3( result, pnt0, &tmpV3_1 );
+}
+
+static inline vec_float4 vmathP3Get128( const VmathPoint3 *pnt )
+{
+    return pnt->vec128;
+}
+
+static inline void vmathP3StoreXYZ( const VmathPoint3 *pnt, vec_float4 *quad )
+{
+    vec_float4 dstVec = *quad;
+    vec_uint4 mask = _VECTORMATH_MASK_0x000F;
+    dstVec = vec_sel(pnt->vec128, dstVec, mask);
+    *quad = dstVec;
+}
+
+static inline void vmathP3LoadXYZArray( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyz1 = vec_sld( xyzx, yzxy, 12 );
+    xyz2 = vec_sld( yzxy, zxyz, 8 );
+    xyz3 = vec_sld( zxyz, zxyz, 4 );
+    pnt0->vec128 = xyzx;
+    pnt1->vec128 = xyz1;
+    pnt2->vec128 = xyz2;
+    pnt3->vec128 = xyz3;
+}
+
+static inline void vmathP3StoreXYZArray( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, vec_float4 *threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz;
+    xyzx = vec_perm( pnt0->vec128, pnt1->vec128, _VECTORMATH_PERM_XYZA );
+    yzxy = vec_perm( pnt1->vec128, pnt2->vec128, _VECTORMATH_PERM_YZAB );
+    zxyz = vec_perm( pnt2->vec128, pnt3->vec128, _VECTORMATH_PERM_ZABC );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+static inline void vmathP3StoreHalfFloats( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, const VmathPoint3 *pnt4, const VmathPoint3 *pnt5, const VmathPoint3 *pnt6, const VmathPoint3 *pnt7, vec_ushort8 *threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    vmathP3StoreXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
+    vmathP3StoreXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+static inline void vmathP3SetX( VmathPoint3 *result, float _x )
+{
+    _vmathVfSetElement(result->vec128, _x, 0);
+}
+
+static inline float vmathP3GetX( const VmathPoint3 *pnt )
+{
+    return _vmathVfGetElement(pnt->vec128, 0);
+}
+
+static inline void vmathP3SetY( VmathPoint3 *result, float _y )
+{
+    _vmathVfSetElement(result->vec128, _y, 1);
+}
+
+static inline float vmathP3GetY( const VmathPoint3 *pnt )
+{
+    return _vmathVfGetElement(pnt->vec128, 1);
+}
+
+static inline void vmathP3SetZ( VmathPoint3 *result, float _z )
+{
+    _vmathVfSetElement(result->vec128, _z, 2);
+}
+
+static inline float vmathP3GetZ( const VmathPoint3 *pnt )
+{
+    return _vmathVfGetElement(pnt->vec128, 2);
+}
+
+static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value )
+{
+    _vmathVfSetElement(result->vec128, value, idx);
+}
+
+static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx )
+{
+    return _vmathVfGetElement(pnt->vec128, idx);
+}
+
+static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = vec_sub( pnt0->vec128, pnt1->vec128 );
+}
+
+static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
+{
+    result->vec128 = vec_add( pnt->vec128, vec1->vec128 );
+}
+
+static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
+{
+    result->vec128 = vec_sub( pnt->vec128, vec1->vec128 );
+}
+
+static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = vec_madd( pnt0->vec128, pnt1->vec128, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = divf4( pnt0->vec128, pnt1->vec128 );
+}
+
+static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = recipf4( pnt->vec128 );
+}
+
+static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = sqrtf4( pnt->vec128 );
+}
+
+static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = rsqrtf4( pnt->vec128 );
+}
+
+static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = fabsf4( pnt->vec128 );
+}
+
+static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = copysignf4( pnt0->vec128, pnt1->vec128 );
+}
+
+static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = fmaxf4( pnt0->vec128, pnt1->vec128 );
+}
+
+static inline float vmathP3MaxElem( const VmathPoint3 *pnt )
+{
+    vec_float4 result;
+    result = fmaxf4( vec_splat( pnt->vec128, 1 ), pnt->vec128 );
+    result = fmaxf4( vec_splat( pnt->vec128, 2 ), result );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = fminf4( pnt0->vec128, pnt1->vec128 );
+}
+
+static inline float vmathP3MinElem( const VmathPoint3 *pnt )
+{
+    vec_float4 result;
+    result = fminf4( vec_splat( pnt->vec128, 1 ), pnt->vec128 );
+    result = fminf4( vec_splat( pnt->vec128, 2 ), result );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline float vmathP3Sum( const VmathPoint3 *pnt )
+{
+    vec_float4 result;
+    result = vec_add( vec_splat( pnt->vec128, 1 ), pnt->vec128 );
+    result = vec_add( vec_splat( pnt->vec128, 2 ), result );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal )
+{
+    VmathPoint3 tmpP3_0;
+    vmathP3MakeFromScalar( &tmpP3_0, scaleVal );
+    vmathP3MulPerElem( result, pnt, &tmpP3_0 );
+}
+
+static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec )
+{
+    VmathPoint3 tmpP3_0;
+    vmathP3MakeFromV3( &tmpP3_0, scaleVec );
+    vmathP3MulPerElem( result, pnt, &tmpP3_0 );
+}
+
+static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec )
+{
+    vec_float4 result = _vmathVfDot3( pnt->vec128, unitVec->vec128 );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt )
+{
+    VmathVector3 tmpV3_0;
+    vmathV3MakeFromP3( &tmpV3_0, pnt );
+    return vmathV3LengthSqr( &tmpV3_0 );
+}
+
+static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt )
+{
+    VmathVector3 tmpV3_0;
+    vmathV3MakeFromP3( &tmpV3_0, pnt );
+    return vmathV3Length( &tmpV3_0 );
+}
+
+static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    VmathVector3 tmpV3_0;
+    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
+    return vmathV3LengthSqr( &tmpV3_0 );
+}
+
+static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    VmathVector3 tmpV3_0;
+    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
+    return vmathV3Length( &tmpV3_0 );
+}
+
+static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 )
+{
+    unsigned int tmp;
+    tmp = (unsigned int)-(select1 > 0);
+    result->vec128 = vec_sel( pnt0->vec128, pnt1->vec128, _vmathVuiSplatScalar(tmp) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathP3Print( const VmathPoint3 *pnt )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = pnt->vec128;
+    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = pnt->vec128;
+    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_aos_v.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_aos_v.h
index 90ee6b98c..6dcc79c72 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_aos_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_aos_v.h
@@ -1,953 +1,953 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_AOS_V_C_H
-#define _VECTORMATH_VEC_AOS_V_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- * for permutes words are labeled [x,y,z,w] [a,b,c,d]
- */
-#define _VECTORMATH_PERM_X 0x00010203
-#define _VECTORMATH_PERM_Y 0x04050607
-#define _VECTORMATH_PERM_Z 0x08090a0b
-#define _VECTORMATH_PERM_W 0x0c0d0e0f
-#define _VECTORMATH_PERM_A 0x10111213
-#define _VECTORMATH_PERM_B 0x14151617
-#define _VECTORMATH_PERM_C 0x18191a1b
-#define _VECTORMATH_PERM_D 0x1c1d1e1f
-#define _VECTORMATH_PERM_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A }
-#define _VECTORMATH_PERM_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_W }
-#define _VECTORMATH_PERM_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W }
-#define _VECTORMATH_PERM_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B }
-#define _VECTORMATH_PERM_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B, _VECTORMATH_PERM_C }
-#define _VECTORMATH_PERM_XYAW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_W }
-#define _VECTORMATH_PERM_XAZW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W }
-#define _VECTORMATH_MASK_0xF000 (vec_uint4){ 0xffffffff, 0, 0, 0 }
-#define _VECTORMATH_MASK_0x0F00 (vec_uint4){ 0, 0xffffffff, 0, 0 }
-#define _VECTORMATH_MASK_0x00F0 (vec_uint4){ 0, 0, 0xffffffff, 0 }
-#define _VECTORMATH_MASK_0x000F (vec_uint4){ 0, 0, 0, 0xffffffff }
-#define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
-#define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline VmathVector3 vmathV3MakeFromElems_V( float _x, float _y, float _z )
-{
-    VmathVector3 result;
-    vmathV3MakeFromElems(&result, _x, _y, _z);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeFromP3_V( VmathPoint3 pnt )
-{
-    VmathVector3 result;
-    vmathV3MakeFromP3(&result, &pnt);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeFromScalar_V( float scalar )
-{
-    VmathVector3 result;
-    vmathV3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeFrom128_V( vec_float4 vf4 )
-{
-    VmathVector3 result;
-    vmathV3MakeFrom128(&result, vf4);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeXAxis_V( )
-{
-    VmathVector3 result;
-    vmathV3MakeXAxis(&result);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeYAxis_V( )
-{
-    VmathVector3 result;
-    vmathV3MakeYAxis(&result);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeZAxis_V( )
-{
-    VmathVector3 result;
-    vmathV3MakeZAxis(&result);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Lerp_V( float t, VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3Lerp(&result, t, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Slerp_V( float t, VmathVector3 unitVec0, VmathVector3 unitVec1 )
-{
-    VmathVector3 result;
-    vmathV3Slerp(&result, t, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline vec_float4 vmathV3Get128_V( VmathVector3 vec )
-{
-    return vmathV3Get128(&vec);
-}
-
-static inline void vmathV3StoreXYZ_V( VmathVector3 vec, vec_float4 *quad )
-{
-    vmathV3StoreXYZ(&vec, quad);
-}
-
-static inline void vmathV3LoadXYZArray_V( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads )
-{
-    vmathV3LoadXYZArray(vec0, vec1, vec2, vec3, threeQuads);
-}
-
-static inline void vmathV3StoreXYZArray_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, vec_float4 *threeQuads )
-{
-    vmathV3StoreXYZArray(&vec0, &vec1, &vec2, &vec3, threeQuads);
-}
-
-static inline void vmathV3StoreHalfFloats_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, VmathVector3 vec4, VmathVector3 vec5, VmathVector3 vec6, VmathVector3 vec7, vec_ushort8 *threeQuads )
-{
-    vmathV3StoreHalfFloats(&vec0, &vec1, &vec2, &vec3, &vec4, &vec5, &vec6, &vec7, threeQuads);
-}
-
-static inline void vmathV3SetX_V( VmathVector3 *result, float _x )
-{
-    vmathV3SetX(result, _x);
-}
-
-static inline float vmathV3GetX_V( VmathVector3 vec )
-{
-    return vmathV3GetX(&vec);
-}
-
-static inline void vmathV3SetY_V( VmathVector3 *result, float _y )
-{
-    vmathV3SetY(result, _y);
-}
-
-static inline float vmathV3GetY_V( VmathVector3 vec )
-{
-    return vmathV3GetY(&vec);
-}
-
-static inline void vmathV3SetZ_V( VmathVector3 *result, float _z )
-{
-    vmathV3SetZ(result, _z);
-}
-
-static inline float vmathV3GetZ_V( VmathVector3 vec )
-{
-    return vmathV3GetZ(&vec);
-}
-
-static inline void vmathV3SetElem_V( VmathVector3 *result, int idx, float value )
-{
-    vmathV3SetElem(result, idx, value);
-}
-
-static inline float vmathV3GetElem_V( VmathVector3 vec, int idx )
-{
-    return vmathV3GetElem(&vec, idx);
-}
-
-static inline VmathVector3 vmathV3Add_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3Add(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Sub_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3Sub(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathPoint3 vmathV3AddP3_V( VmathVector3 vec, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathV3AddP3(&result, &vec, &pnt1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3ScalarMul_V( VmathVector3 vec, float scalar )
-{
-    VmathVector3 result;
-    vmathV3ScalarMul(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathVector3 vmathV3ScalarDiv_V( VmathVector3 vec, float scalar )
-{
-    VmathVector3 result;
-    vmathV3ScalarDiv(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Neg_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3Neg(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MulPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3MulPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3DivPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3DivPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3RecipPerElem_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3RecipPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3SqrtPerElem_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3SqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3RsqrtPerElem_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3RsqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3AbsPerElem_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3AbsPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3CopySignPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3CopySignPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MaxPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3MaxPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline float vmathV3MaxElem_V( VmathVector3 vec )
-{
-    return vmathV3MaxElem(&vec);
-}
-
-static inline VmathVector3 vmathV3MinPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3MinPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline float vmathV3MinElem_V( VmathVector3 vec )
-{
-    return vmathV3MinElem(&vec);
-}
-
-static inline float vmathV3Sum_V( VmathVector3 vec )
-{
-    return vmathV3Sum(&vec);
-}
-
-static inline float vmathV3Dot_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    return vmathV3Dot(&vec0, &vec1);
-}
-
-static inline float vmathV3LengthSqr_V( VmathVector3 vec )
-{
-    return vmathV3LengthSqr(&vec);
-}
-
-static inline float vmathV3Length_V( VmathVector3 vec )
-{
-    return vmathV3Length(&vec);
-}
-
-static inline VmathVector3 vmathV3Normalize_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3Normalize(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Cross_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3Cross(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Select_V( VmathVector3 vec0, VmathVector3 vec1, unsigned int select1 )
-{
-    VmathVector3 result;
-    vmathV3Select(&result, &vec0, &vec1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathV3Print_V( VmathVector3 vec )
-{
-    vmathV3Print(&vec);
-}
-
-static inline void vmathV3Prints_V( VmathVector3 vec, const char *name )
-{
-    vmathV3Prints(&vec, name);
-}
-
-#endif
-
-static inline VmathVector4 vmathV4MakeFromElems_V( float _x, float _y, float _z, float _w )
-{
-    VmathVector4 result;
-    vmathV4MakeFromElems(&result, _x, _y, _z, _w);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromV3Scalar_V( VmathVector3 xyz, float _w )
-{
-    VmathVector4 result;
-    vmathV4MakeFromV3Scalar(&result, &xyz, _w);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromV3_V( VmathVector3 vec )
-{
-    VmathVector4 result;
-    vmathV4MakeFromV3(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromP3_V( VmathPoint3 pnt )
-{
-    VmathVector4 result;
-    vmathV4MakeFromP3(&result, &pnt);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromQ_V( VmathQuat quat )
-{
-    VmathVector4 result;
-    vmathV4MakeFromQ(&result, &quat);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromScalar_V( float scalar )
-{
-    VmathVector4 result;
-    vmathV4MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFrom128_V( vec_float4 vf4 )
-{
-    VmathVector4 result;
-    vmathV4MakeFrom128(&result, vf4);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeXAxis_V( )
-{
-    VmathVector4 result;
-    vmathV4MakeXAxis(&result);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeYAxis_V( )
-{
-    VmathVector4 result;
-    vmathV4MakeYAxis(&result);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeZAxis_V( )
-{
-    VmathVector4 result;
-    vmathV4MakeZAxis(&result);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeWAxis_V( )
-{
-    VmathVector4 result;
-    vmathV4MakeWAxis(&result);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Lerp_V( float t, VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4Lerp(&result, t, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Slerp_V( float t, VmathVector4 unitVec0, VmathVector4 unitVec1 )
-{
-    VmathVector4 result;
-    vmathV4Slerp(&result, t, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline vec_float4 vmathV4Get128_V( VmathVector4 vec )
-{
-    return vmathV4Get128(&vec);
-}
-
-static inline void vmathV4StoreHalfFloats_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3, vec_ushort8 *twoQuads )
-{
-    vmathV4StoreHalfFloats(&vec0, &vec1, &vec2, &vec3, twoQuads);
-}
-
-static inline void vmathV4SetXYZ_V( VmathVector4 *result, VmathVector3 vec )
-{
-    vmathV4SetXYZ(result, &vec);
-}
-
-static inline VmathVector3 vmathV4GetXYZ_V( VmathVector4 vec )
-{
-    VmathVector3 result;
-    vmathV4GetXYZ(&result, &vec);
-    return result;
-}
-
-static inline void vmathV4SetX_V( VmathVector4 *result, float _x )
-{
-    vmathV4SetX(result, _x);
-}
-
-static inline float vmathV4GetX_V( VmathVector4 vec )
-{
-    return vmathV4GetX(&vec);
-}
-
-static inline void vmathV4SetY_V( VmathVector4 *result, float _y )
-{
-    vmathV4SetY(result, _y);
-}
-
-static inline float vmathV4GetY_V( VmathVector4 vec )
-{
-    return vmathV4GetY(&vec);
-}
-
-static inline void vmathV4SetZ_V( VmathVector4 *result, float _z )
-{
-    vmathV4SetZ(result, _z);
-}
-
-static inline float vmathV4GetZ_V( VmathVector4 vec )
-{
-    return vmathV4GetZ(&vec);
-}
-
-static inline void vmathV4SetW_V( VmathVector4 *result, float _w )
-{
-    vmathV4SetW(result, _w);
-}
-
-static inline float vmathV4GetW_V( VmathVector4 vec )
-{
-    return vmathV4GetW(&vec);
-}
-
-static inline void vmathV4SetElem_V( VmathVector4 *result, int idx, float value )
-{
-    vmathV4SetElem(result, idx, value);
-}
-
-static inline float vmathV4GetElem_V( VmathVector4 vec, int idx )
-{
-    return vmathV4GetElem(&vec, idx);
-}
-
-static inline VmathVector4 vmathV4Add_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4Add(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Sub_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4Sub(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4ScalarMul_V( VmathVector4 vec, float scalar )
-{
-    VmathVector4 result;
-    vmathV4ScalarMul(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathVector4 vmathV4ScalarDiv_V( VmathVector4 vec, float scalar )
-{
-    VmathVector4 result;
-    vmathV4ScalarDiv(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Neg_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4Neg(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MulPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4MulPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4DivPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4DivPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4RecipPerElem_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4RecipPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4SqrtPerElem_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4SqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4RsqrtPerElem_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4RsqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4AbsPerElem_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4AbsPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4CopySignPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4CopySignPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MaxPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4MaxPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline float vmathV4MaxElem_V( VmathVector4 vec )
-{
-    return vmathV4MaxElem(&vec);
-}
-
-static inline VmathVector4 vmathV4MinPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4MinPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline float vmathV4MinElem_V( VmathVector4 vec )
-{
-    return vmathV4MinElem(&vec);
-}
-
-static inline float vmathV4Sum_V( VmathVector4 vec )
-{
-    return vmathV4Sum(&vec);
-}
-
-static inline float vmathV4Dot_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    return vmathV4Dot(&vec0, &vec1);
-}
-
-static inline float vmathV4LengthSqr_V( VmathVector4 vec )
-{
-    return vmathV4LengthSqr(&vec);
-}
-
-static inline float vmathV4Length_V( VmathVector4 vec )
-{
-    return vmathV4Length(&vec);
-}
-
-static inline VmathVector4 vmathV4Normalize_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4Normalize(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Select_V( VmathVector4 vec0, VmathVector4 vec1, unsigned int select1 )
-{
-    VmathVector4 result;
-    vmathV4Select(&result, &vec0, &vec1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathV4Print_V( VmathVector4 vec )
-{
-    vmathV4Print(&vec);
-}
-
-static inline void vmathV4Prints_V( VmathVector4 vec, const char *name )
-{
-    vmathV4Prints(&vec, name);
-}
-
-#endif
-
-static inline VmathPoint3 vmathP3MakeFromElems_V( float _x, float _y, float _z )
-{
-    VmathPoint3 result;
-    vmathP3MakeFromElems(&result, _x, _y, _z);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MakeFromV3_V( VmathVector3 vec )
-{
-    VmathPoint3 result;
-    vmathP3MakeFromV3(&result, &vec);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MakeFromScalar_V( float scalar )
-{
-    VmathPoint3 result;
-    vmathP3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MakeFrom128_V( vec_float4 vf4 )
-{
-    VmathPoint3 result;
-    vmathP3MakeFrom128(&result, vf4);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3Lerp_V( float t, VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3Lerp(&result, t, &pnt0, &pnt1);
-    return result;
-}
-
-static inline vec_float4 vmathP3Get128_V( VmathPoint3 pnt )
-{
-    return vmathP3Get128(&pnt);
-}
-
-static inline void vmathP3StoreXYZ_V( VmathPoint3 pnt, vec_float4 *quad )
-{
-    vmathP3StoreXYZ(&pnt, quad);
-}
-
-static inline void vmathP3LoadXYZArray_V( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads )
-{
-    vmathP3LoadXYZArray(pnt0, pnt1, pnt2, pnt3, threeQuads);
-}
-
-static inline void vmathP3StoreXYZArray_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, vec_float4 *threeQuads )
-{
-    vmathP3StoreXYZArray(&pnt0, &pnt1, &pnt2, &pnt3, threeQuads);
-}
-
-static inline void vmathP3StoreHalfFloats_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, VmathPoint3 pnt4, VmathPoint3 pnt5, VmathPoint3 pnt6, VmathPoint3 pnt7, vec_ushort8 *threeQuads )
-{
-    vmathP3StoreHalfFloats(&pnt0, &pnt1, &pnt2, &pnt3, &pnt4, &pnt5, &pnt6, &pnt7, threeQuads);
-}
-
-static inline void vmathP3SetX_V( VmathPoint3 *result, float _x )
-{
-    vmathP3SetX(result, _x);
-}
-
-static inline float vmathP3GetX_V( VmathPoint3 pnt )
-{
-    return vmathP3GetX(&pnt);
-}
-
-static inline void vmathP3SetY_V( VmathPoint3 *result, float _y )
-{
-    vmathP3SetY(result, _y);
-}
-
-static inline float vmathP3GetY_V( VmathPoint3 pnt )
-{
-    return vmathP3GetY(&pnt);
-}
-
-static inline void vmathP3SetZ_V( VmathPoint3 *result, float _z )
-{
-    vmathP3SetZ(result, _z);
-}
-
-static inline float vmathP3GetZ_V( VmathPoint3 pnt )
-{
-    return vmathP3GetZ(&pnt);
-}
-
-static inline void vmathP3SetElem_V( VmathPoint3 *result, int idx, float value )
-{
-    vmathP3SetElem(result, idx, value);
-}
-
-static inline float vmathP3GetElem_V( VmathPoint3 pnt, int idx )
-{
-    return vmathP3GetElem(&pnt, idx);
-}
-
-static inline VmathVector3 vmathP3Sub_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathVector3 result;
-    vmathP3Sub(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3AddV3_V( VmathPoint3 pnt, VmathVector3 vec1 )
-{
-    VmathPoint3 result;
-    vmathP3AddV3(&result, &pnt, &vec1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3SubV3_V( VmathPoint3 pnt, VmathVector3 vec1 )
-{
-    VmathPoint3 result;
-    vmathP3SubV3(&result, &pnt, &vec1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MulPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3MulPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3DivPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3DivPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3RecipPerElem_V( VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathP3RecipPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3SqrtPerElem_V( VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathP3SqrtPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3RsqrtPerElem_V( VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathP3RsqrtPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3AbsPerElem_V( VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathP3AbsPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3CopySignPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3CopySignPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MaxPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3MaxPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline float vmathP3MaxElem_V( VmathPoint3 pnt )
-{
-    return vmathP3MaxElem(&pnt);
-}
-
-static inline VmathPoint3 vmathP3MinPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3MinPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline float vmathP3MinElem_V( VmathPoint3 pnt )
-{
-    return vmathP3MinElem(&pnt);
-}
-
-static inline float vmathP3Sum_V( VmathPoint3 pnt )
-{
-    return vmathP3Sum(&pnt);
-}
-
-static inline VmathPoint3 vmathP3Scale_V( VmathPoint3 pnt, float scaleVal )
-{
-    VmathPoint3 result;
-    vmathP3Scale(&result, &pnt, scaleVal);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3NonUniformScale_V( VmathPoint3 pnt, VmathVector3 scaleVec )
-{
-    VmathPoint3 result;
-    vmathP3NonUniformScale(&result, &pnt, &scaleVec);
-    return result;
-}
-
-static inline float vmathP3Projection_V( VmathPoint3 pnt, VmathVector3 unitVec )
-{
-    return vmathP3Projection(&pnt, &unitVec);
-}
-
-static inline float vmathP3DistSqrFromOrigin_V( VmathPoint3 pnt )
-{
-    return vmathP3DistSqrFromOrigin(&pnt);
-}
-
-static inline float vmathP3DistFromOrigin_V( VmathPoint3 pnt )
-{
-    return vmathP3DistFromOrigin(&pnt);
-}
-
-static inline float vmathP3DistSqr_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    return vmathP3DistSqr(&pnt0, &pnt1);
-}
-
-static inline float vmathP3Dist_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    return vmathP3Dist(&pnt0, &pnt1);
-}
-
-static inline VmathPoint3 vmathP3Select_V( VmathPoint3 pnt0, VmathPoint3 pnt1, unsigned int select1 )
-{
-    VmathPoint3 result;
-    vmathP3Select(&result, &pnt0, &pnt1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathP3Print_V( VmathPoint3 pnt )
-{
-    vmathP3Print(&pnt);
-}
-
-static inline void vmathP3Prints_V( VmathPoint3 pnt, const char *name )
-{
-    vmathP3Prints(&pnt, name);
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_AOS_V_C_H
+#define _VECTORMATH_VEC_AOS_V_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ * for permutes words are labeled [x,y,z,w] [a,b,c,d]
+ */
+#define _VECTORMATH_PERM_X 0x00010203
+#define _VECTORMATH_PERM_Y 0x04050607
+#define _VECTORMATH_PERM_Z 0x08090a0b
+#define _VECTORMATH_PERM_W 0x0c0d0e0f
+#define _VECTORMATH_PERM_A 0x10111213
+#define _VECTORMATH_PERM_B 0x14151617
+#define _VECTORMATH_PERM_C 0x18191a1b
+#define _VECTORMATH_PERM_D 0x1c1d1e1f
+#define _VECTORMATH_PERM_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A }
+#define _VECTORMATH_PERM_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B }
+#define _VECTORMATH_PERM_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B, _VECTORMATH_PERM_C }
+#define _VECTORMATH_PERM_XYAW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_XAZW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W }
+#define _VECTORMATH_MASK_0xF000 (vec_uint4){ 0xffffffff, 0, 0, 0 }
+#define _VECTORMATH_MASK_0x0F00 (vec_uint4){ 0, 0xffffffff, 0, 0 }
+#define _VECTORMATH_MASK_0x00F0 (vec_uint4){ 0, 0, 0xffffffff, 0 }
+#define _VECTORMATH_MASK_0x000F (vec_uint4){ 0, 0, 0, 0xffffffff }
+#define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
+#define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline VmathVector3 vmathV3MakeFromElems_V( float _x, float _y, float _z )
+{
+    VmathVector3 result;
+    vmathV3MakeFromElems(&result, _x, _y, _z);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeFromP3_V( VmathPoint3 pnt )
+{
+    VmathVector3 result;
+    vmathV3MakeFromP3(&result, &pnt);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeFromScalar_V( float scalar )
+{
+    VmathVector3 result;
+    vmathV3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeFrom128_V( vec_float4 vf4 )
+{
+    VmathVector3 result;
+    vmathV3MakeFrom128(&result, vf4);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeXAxis_V( )
+{
+    VmathVector3 result;
+    vmathV3MakeXAxis(&result);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeYAxis_V( )
+{
+    VmathVector3 result;
+    vmathV3MakeYAxis(&result);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeZAxis_V( )
+{
+    VmathVector3 result;
+    vmathV3MakeZAxis(&result);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Lerp_V( float t, VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3Lerp(&result, t, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Slerp_V( float t, VmathVector3 unitVec0, VmathVector3 unitVec1 )
+{
+    VmathVector3 result;
+    vmathV3Slerp(&result, t, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline vec_float4 vmathV3Get128_V( VmathVector3 vec )
+{
+    return vmathV3Get128(&vec);
+}
+
+static inline void vmathV3StoreXYZ_V( VmathVector3 vec, vec_float4 *quad )
+{
+    vmathV3StoreXYZ(&vec, quad);
+}
+
+static inline void vmathV3LoadXYZArray_V( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads )
+{
+    vmathV3LoadXYZArray(vec0, vec1, vec2, vec3, threeQuads);
+}
+
+static inline void vmathV3StoreXYZArray_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, vec_float4 *threeQuads )
+{
+    vmathV3StoreXYZArray(&vec0, &vec1, &vec2, &vec3, threeQuads);
+}
+
+static inline void vmathV3StoreHalfFloats_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, VmathVector3 vec4, VmathVector3 vec5, VmathVector3 vec6, VmathVector3 vec7, vec_ushort8 *threeQuads )
+{
+    vmathV3StoreHalfFloats(&vec0, &vec1, &vec2, &vec3, &vec4, &vec5, &vec6, &vec7, threeQuads);
+}
+
+static inline void vmathV3SetX_V( VmathVector3 *result, float _x )
+{
+    vmathV3SetX(result, _x);
+}
+
+static inline float vmathV3GetX_V( VmathVector3 vec )
+{
+    return vmathV3GetX(&vec);
+}
+
+static inline void vmathV3SetY_V( VmathVector3 *result, float _y )
+{
+    vmathV3SetY(result, _y);
+}
+
+static inline float vmathV3GetY_V( VmathVector3 vec )
+{
+    return vmathV3GetY(&vec);
+}
+
+static inline void vmathV3SetZ_V( VmathVector3 *result, float _z )
+{
+    vmathV3SetZ(result, _z);
+}
+
+static inline float vmathV3GetZ_V( VmathVector3 vec )
+{
+    return vmathV3GetZ(&vec);
+}
+
+static inline void vmathV3SetElem_V( VmathVector3 *result, int idx, float value )
+{
+    vmathV3SetElem(result, idx, value);
+}
+
+static inline float vmathV3GetElem_V( VmathVector3 vec, int idx )
+{
+    return vmathV3GetElem(&vec, idx);
+}
+
+static inline VmathVector3 vmathV3Add_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3Add(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Sub_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3Sub(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathPoint3 vmathV3AddP3_V( VmathVector3 vec, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathV3AddP3(&result, &vec, &pnt1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3ScalarMul_V( VmathVector3 vec, float scalar )
+{
+    VmathVector3 result;
+    vmathV3ScalarMul(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathVector3 vmathV3ScalarDiv_V( VmathVector3 vec, float scalar )
+{
+    VmathVector3 result;
+    vmathV3ScalarDiv(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Neg_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3Neg(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MulPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3MulPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3DivPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3DivPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3RecipPerElem_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3RecipPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3SqrtPerElem_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3SqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3RsqrtPerElem_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3RsqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3AbsPerElem_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3AbsPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3CopySignPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3CopySignPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MaxPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3MaxPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline float vmathV3MaxElem_V( VmathVector3 vec )
+{
+    return vmathV3MaxElem(&vec);
+}
+
+static inline VmathVector3 vmathV3MinPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3MinPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline float vmathV3MinElem_V( VmathVector3 vec )
+{
+    return vmathV3MinElem(&vec);
+}
+
+static inline float vmathV3Sum_V( VmathVector3 vec )
+{
+    return vmathV3Sum(&vec);
+}
+
+static inline float vmathV3Dot_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    return vmathV3Dot(&vec0, &vec1);
+}
+
+static inline float vmathV3LengthSqr_V( VmathVector3 vec )
+{
+    return vmathV3LengthSqr(&vec);
+}
+
+static inline float vmathV3Length_V( VmathVector3 vec )
+{
+    return vmathV3Length(&vec);
+}
+
+static inline VmathVector3 vmathV3Normalize_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3Normalize(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Cross_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3Cross(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Select_V( VmathVector3 vec0, VmathVector3 vec1, unsigned int select1 )
+{
+    VmathVector3 result;
+    vmathV3Select(&result, &vec0, &vec1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathV3Print_V( VmathVector3 vec )
+{
+    vmathV3Print(&vec);
+}
+
+static inline void vmathV3Prints_V( VmathVector3 vec, const char *name )
+{
+    vmathV3Prints(&vec, name);
+}
+
+#endif
+
+static inline VmathVector4 vmathV4MakeFromElems_V( float _x, float _y, float _z, float _w )
+{
+    VmathVector4 result;
+    vmathV4MakeFromElems(&result, _x, _y, _z, _w);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromV3Scalar_V( VmathVector3 xyz, float _w )
+{
+    VmathVector4 result;
+    vmathV4MakeFromV3Scalar(&result, &xyz, _w);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromV3_V( VmathVector3 vec )
+{
+    VmathVector4 result;
+    vmathV4MakeFromV3(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromP3_V( VmathPoint3 pnt )
+{
+    VmathVector4 result;
+    vmathV4MakeFromP3(&result, &pnt);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromQ_V( VmathQuat quat )
+{
+    VmathVector4 result;
+    vmathV4MakeFromQ(&result, &quat);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromScalar_V( float scalar )
+{
+    VmathVector4 result;
+    vmathV4MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFrom128_V( vec_float4 vf4 )
+{
+    VmathVector4 result;
+    vmathV4MakeFrom128(&result, vf4);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeXAxis_V( )
+{
+    VmathVector4 result;
+    vmathV4MakeXAxis(&result);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeYAxis_V( )
+{
+    VmathVector4 result;
+    vmathV4MakeYAxis(&result);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeZAxis_V( )
+{
+    VmathVector4 result;
+    vmathV4MakeZAxis(&result);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeWAxis_V( )
+{
+    VmathVector4 result;
+    vmathV4MakeWAxis(&result);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Lerp_V( float t, VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4Lerp(&result, t, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Slerp_V( float t, VmathVector4 unitVec0, VmathVector4 unitVec1 )
+{
+    VmathVector4 result;
+    vmathV4Slerp(&result, t, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline vec_float4 vmathV4Get128_V( VmathVector4 vec )
+{
+    return vmathV4Get128(&vec);
+}
+
+static inline void vmathV4StoreHalfFloats_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3, vec_ushort8 *twoQuads )
+{
+    vmathV4StoreHalfFloats(&vec0, &vec1, &vec2, &vec3, twoQuads);
+}
+
+static inline void vmathV4SetXYZ_V( VmathVector4 *result, VmathVector3 vec )
+{
+    vmathV4SetXYZ(result, &vec);
+}
+
+static inline VmathVector3 vmathV4GetXYZ_V( VmathVector4 vec )
+{
+    VmathVector3 result;
+    vmathV4GetXYZ(&result, &vec);
+    return result;
+}
+
+static inline void vmathV4SetX_V( VmathVector4 *result, float _x )
+{
+    vmathV4SetX(result, _x);
+}
+
+static inline float vmathV4GetX_V( VmathVector4 vec )
+{
+    return vmathV4GetX(&vec);
+}
+
+static inline void vmathV4SetY_V( VmathVector4 *result, float _y )
+{
+    vmathV4SetY(result, _y);
+}
+
+static inline float vmathV4GetY_V( VmathVector4 vec )
+{
+    return vmathV4GetY(&vec);
+}
+
+static inline void vmathV4SetZ_V( VmathVector4 *result, float _z )
+{
+    vmathV4SetZ(result, _z);
+}
+
+static inline float vmathV4GetZ_V( VmathVector4 vec )
+{
+    return vmathV4GetZ(&vec);
+}
+
+static inline void vmathV4SetW_V( VmathVector4 *result, float _w )
+{
+    vmathV4SetW(result, _w);
+}
+
+static inline float vmathV4GetW_V( VmathVector4 vec )
+{
+    return vmathV4GetW(&vec);
+}
+
+static inline void vmathV4SetElem_V( VmathVector4 *result, int idx, float value )
+{
+    vmathV4SetElem(result, idx, value);
+}
+
+static inline float vmathV4GetElem_V( VmathVector4 vec, int idx )
+{
+    return vmathV4GetElem(&vec, idx);
+}
+
+static inline VmathVector4 vmathV4Add_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4Add(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Sub_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4Sub(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4ScalarMul_V( VmathVector4 vec, float scalar )
+{
+    VmathVector4 result;
+    vmathV4ScalarMul(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathVector4 vmathV4ScalarDiv_V( VmathVector4 vec, float scalar )
+{
+    VmathVector4 result;
+    vmathV4ScalarDiv(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Neg_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4Neg(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MulPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4MulPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4DivPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4DivPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4RecipPerElem_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4RecipPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4SqrtPerElem_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4SqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4RsqrtPerElem_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4RsqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4AbsPerElem_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4AbsPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4CopySignPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4CopySignPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MaxPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4MaxPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline float vmathV4MaxElem_V( VmathVector4 vec )
+{
+    return vmathV4MaxElem(&vec);
+}
+
+static inline VmathVector4 vmathV4MinPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4MinPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline float vmathV4MinElem_V( VmathVector4 vec )
+{
+    return vmathV4MinElem(&vec);
+}
+
+static inline float vmathV4Sum_V( VmathVector4 vec )
+{
+    return vmathV4Sum(&vec);
+}
+
+static inline float vmathV4Dot_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    return vmathV4Dot(&vec0, &vec1);
+}
+
+static inline float vmathV4LengthSqr_V( VmathVector4 vec )
+{
+    return vmathV4LengthSqr(&vec);
+}
+
+static inline float vmathV4Length_V( VmathVector4 vec )
+{
+    return vmathV4Length(&vec);
+}
+
+static inline VmathVector4 vmathV4Normalize_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4Normalize(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Select_V( VmathVector4 vec0, VmathVector4 vec1, unsigned int select1 )
+{
+    VmathVector4 result;
+    vmathV4Select(&result, &vec0, &vec1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathV4Print_V( VmathVector4 vec )
+{
+    vmathV4Print(&vec);
+}
+
+static inline void vmathV4Prints_V( VmathVector4 vec, const char *name )
+{
+    vmathV4Prints(&vec, name);
+}
+
+#endif
+
+static inline VmathPoint3 vmathP3MakeFromElems_V( float _x, float _y, float _z )
+{
+    VmathPoint3 result;
+    vmathP3MakeFromElems(&result, _x, _y, _z);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MakeFromV3_V( VmathVector3 vec )
+{
+    VmathPoint3 result;
+    vmathP3MakeFromV3(&result, &vec);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MakeFromScalar_V( float scalar )
+{
+    VmathPoint3 result;
+    vmathP3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MakeFrom128_V( vec_float4 vf4 )
+{
+    VmathPoint3 result;
+    vmathP3MakeFrom128(&result, vf4);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3Lerp_V( float t, VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3Lerp(&result, t, &pnt0, &pnt1);
+    return result;
+}
+
+static inline vec_float4 vmathP3Get128_V( VmathPoint3 pnt )
+{
+    return vmathP3Get128(&pnt);
+}
+
+static inline void vmathP3StoreXYZ_V( VmathPoint3 pnt, vec_float4 *quad )
+{
+    vmathP3StoreXYZ(&pnt, quad);
+}
+
+static inline void vmathP3LoadXYZArray_V( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads )
+{
+    vmathP3LoadXYZArray(pnt0, pnt1, pnt2, pnt3, threeQuads);
+}
+
+static inline void vmathP3StoreXYZArray_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, vec_float4 *threeQuads )
+{
+    vmathP3StoreXYZArray(&pnt0, &pnt1, &pnt2, &pnt3, threeQuads);
+}
+
+static inline void vmathP3StoreHalfFloats_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, VmathPoint3 pnt4, VmathPoint3 pnt5, VmathPoint3 pnt6, VmathPoint3 pnt7, vec_ushort8 *threeQuads )
+{
+    vmathP3StoreHalfFloats(&pnt0, &pnt1, &pnt2, &pnt3, &pnt4, &pnt5, &pnt6, &pnt7, threeQuads);
+}
+
+static inline void vmathP3SetX_V( VmathPoint3 *result, float _x )
+{
+    vmathP3SetX(result, _x);
+}
+
+static inline float vmathP3GetX_V( VmathPoint3 pnt )
+{
+    return vmathP3GetX(&pnt);
+}
+
+static inline void vmathP3SetY_V( VmathPoint3 *result, float _y )
+{
+    vmathP3SetY(result, _y);
+}
+
+static inline float vmathP3GetY_V( VmathPoint3 pnt )
+{
+    return vmathP3GetY(&pnt);
+}
+
+static inline void vmathP3SetZ_V( VmathPoint3 *result, float _z )
+{
+    vmathP3SetZ(result, _z);
+}
+
+static inline float vmathP3GetZ_V( VmathPoint3 pnt )
+{
+    return vmathP3GetZ(&pnt);
+}
+
+static inline void vmathP3SetElem_V( VmathPoint3 *result, int idx, float value )
+{
+    vmathP3SetElem(result, idx, value);
+}
+
+static inline float vmathP3GetElem_V( VmathPoint3 pnt, int idx )
+{
+    return vmathP3GetElem(&pnt, idx);
+}
+
+static inline VmathVector3 vmathP3Sub_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathVector3 result;
+    vmathP3Sub(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3AddV3_V( VmathPoint3 pnt, VmathVector3 vec1 )
+{
+    VmathPoint3 result;
+    vmathP3AddV3(&result, &pnt, &vec1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3SubV3_V( VmathPoint3 pnt, VmathVector3 vec1 )
+{
+    VmathPoint3 result;
+    vmathP3SubV3(&result, &pnt, &vec1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MulPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3MulPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3DivPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3DivPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3RecipPerElem_V( VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathP3RecipPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3SqrtPerElem_V( VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathP3SqrtPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3RsqrtPerElem_V( VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathP3RsqrtPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3AbsPerElem_V( VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathP3AbsPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3CopySignPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3CopySignPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MaxPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3MaxPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline float vmathP3MaxElem_V( VmathPoint3 pnt )
+{
+    return vmathP3MaxElem(&pnt);
+}
+
+static inline VmathPoint3 vmathP3MinPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3MinPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline float vmathP3MinElem_V( VmathPoint3 pnt )
+{
+    return vmathP3MinElem(&pnt);
+}
+
+static inline float vmathP3Sum_V( VmathPoint3 pnt )
+{
+    return vmathP3Sum(&pnt);
+}
+
+static inline VmathPoint3 vmathP3Scale_V( VmathPoint3 pnt, float scaleVal )
+{
+    VmathPoint3 result;
+    vmathP3Scale(&result, &pnt, scaleVal);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3NonUniformScale_V( VmathPoint3 pnt, VmathVector3 scaleVec )
+{
+    VmathPoint3 result;
+    vmathP3NonUniformScale(&result, &pnt, &scaleVec);
+    return result;
+}
+
+static inline float vmathP3Projection_V( VmathPoint3 pnt, VmathVector3 unitVec )
+{
+    return vmathP3Projection(&pnt, &unitVec);
+}
+
+static inline float vmathP3DistSqrFromOrigin_V( VmathPoint3 pnt )
+{
+    return vmathP3DistSqrFromOrigin(&pnt);
+}
+
+static inline float vmathP3DistFromOrigin_V( VmathPoint3 pnt )
+{
+    return vmathP3DistFromOrigin(&pnt);
+}
+
+static inline float vmathP3DistSqr_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    return vmathP3DistSqr(&pnt0, &pnt1);
+}
+
+static inline float vmathP3Dist_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    return vmathP3Dist(&pnt0, &pnt1);
+}
+
+static inline VmathPoint3 vmathP3Select_V( VmathPoint3 pnt0, VmathPoint3 pnt1, unsigned int select1 )
+{
+    VmathPoint3 result;
+    vmathP3Select(&result, &pnt0, &pnt1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathP3Print_V( VmathPoint3 pnt )
+{
+    vmathP3Print(&pnt);
+}
+
+static inline void vmathP3Prints_V( VmathPoint3 pnt, const char *name )
+{
+    vmathP3Prints(&pnt, name);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_soa.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_soa.h
index 6433666e6..1cda25747 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_soa.h
@@ -1,1223 +1,1223 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_SOA_C_H
-#define _VECTORMATH_VEC_SOA_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- * for permutes, words are labeled [x,y,z,w] [a,b,c,d]
- */
-#define _VECTORMATH_PERM_X 0x00010203
-#define _VECTORMATH_PERM_Y 0x04050607
-#define _VECTORMATH_PERM_Z 0x08090a0b
-#define _VECTORMATH_PERM_W 0x0c0d0e0f
-#define _VECTORMATH_PERM_A 0x10111213
-#define _VECTORMATH_PERM_B 0x14151617
-#define _VECTORMATH_PERM_C 0x18191a1b
-#define _VECTORMATH_PERM_D 0x1c1d1e1f
-#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_ZDWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_ZCXA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_A })
-#define _VECTORMATH_PERM_XBZD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_WDYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B })
-#define _VECTORMATH_PERM_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_WCYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
-#define _VECTORMATH_PERM_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B })
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline void vmathSoaV3Copy( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-}
-
-static inline void vmathSoaV3MakeFromElems( VmathSoaVector3 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z )
-{
-    result->x = _x;
-    result->y = _y;
-    result->z = _z;
-}
-
-static inline void vmathSoaV3MakeFromP3( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = pnt->x;
-    result->y = pnt->y;
-    result->z = pnt->z;
-}
-
-static inline void vmathSoaV3MakeFromScalar( VmathSoaVector3 *result, vec_float4 scalar )
-{
-    result->x = scalar;
-    result->y = scalar;
-    result->z = scalar;
-}
-
-static inline void vmathSoaV3MakeFromAos( VmathSoaVector3 *result, const VmathVector3 *vec )
-{
-    vec_float4 vec128 = vec->vec128;
-    result->x = vec_splat( vec128, 0 );
-    result->y = vec_splat( vec128, 1 );
-    result->z = vec_splat( vec128, 2 );
-}
-
-static inline void vmathSoaV3MakeFrom4Aos( VmathSoaVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = vec_mergeh( vec0->vec128, vec2->vec128 );
-    tmp1 = vec_mergeh( vec1->vec128, vec3->vec128 );
-    tmp2 = vec_mergel( vec0->vec128, vec2->vec128 );
-    tmp3 = vec_mergel( vec1->vec128, vec3->vec128 );
-    result->x = vec_mergeh( tmp0, tmp1 );
-    result->y = vec_mergel( tmp0, tmp1 );
-    result->z = vec_mergeh( tmp2, tmp3 );
-}
-
-static inline void vmathSoaV3MakeXAxis( VmathSoaVector3 *result )
-{
-    vmathSoaV3MakeFromElems( result, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaV3MakeYAxis( VmathSoaVector3 *result )
-{
-    vmathSoaV3MakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaV3MakeZAxis( VmathSoaVector3 *result )
-{
-    vmathSoaV3MakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-static inline void vmathSoaV3Lerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    VmathSoaVector3 tmpV3_0, tmpV3_1;
-    vmathSoaV3Sub( &tmpV3_0, vec1, vec0 );
-    vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
-    vmathSoaV3Add( result, vec0, &tmpV3_1 );
-}
-
-static inline void vmathSoaV3Slerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 )
-{
-    VmathSoaVector3 tmpV3_0, tmpV3_1;
-    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
-    vec_uint4 selectMask;
-    cosAngle = vmathSoaV3Dot( unitVec0, unitVec1 );
-    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
-    angle = acosf4( cosAngle );
-    recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
-    scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
-    scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
-    vmathSoaV3ScalarMul( &tmpV3_0, unitVec0, scale0 );
-    vmathSoaV3ScalarMul( &tmpV3_1, unitVec1, scale1 );
-    vmathSoaV3Add( result, &tmpV3_0, &tmpV3_1 );
-}
-
-static inline void vmathSoaV3Get4Aos( const VmathSoaVector3 *vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 )
-{
-    vec_float4 tmp0, tmp1;
-    tmp0 = vec_mergeh( vec->x, vec->z );
-    tmp1 = vec_mergel( vec->x, vec->z );
-    vmathV3MakeFrom128( result0, vec_mergeh( tmp0, vec->y ) );
-    vmathV3MakeFrom128( result1, vec_perm( tmp0, vec->y, _VECTORMATH_PERM_ZBWX ) );
-    vmathV3MakeFrom128( result2, vec_perm( tmp1, vec->y, _VECTORMATH_PERM_XCYX ) );
-    vmathV3MakeFrom128( result3, vec_perm( tmp1, vec->y, _VECTORMATH_PERM_ZDWX ) );
-}
-
-static inline void vmathSoaV3LoadXYZArray( VmathSoaVector3 *vec, const vec_float4 *threeQuads )
-{
-    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyxy = vec_sld( yzxy, xyzx, 8 );
-    zxzx = vec_sld( xyzx, zxyz, 8 );
-    yzyz = vec_sld( zxyz, yzxy, 8 );
-    vmathSoaV3SetX( vec, vec_perm( xyxy, zxzx, _VECTORMATH_PERM_ZBXD ) );
-    vmathSoaV3SetY( vec, vec_perm( xyxy, yzyz, _VECTORMATH_PERM_WCYA ) );
-    vmathSoaV3SetZ( vec, vec_perm( zxzx, yzyz, _VECTORMATH_PERM_XDZB ) );
-}
-
-static inline void vmathSoaV3StoreXYZArray( const VmathSoaVector3 *vec, vec_float4 *threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
-    xyxy = vec_perm( vec->x, vec->y, _VECTORMATH_PERM_ZCXA );
-    zxzx = vec_perm( vec->z, vec->x, _VECTORMATH_PERM_XBZD );
-    yzyz = vec_perm( vec->y, vec->z, _VECTORMATH_PERM_WDYB );
-    xyzx = vec_sld( xyxy, zxzx, 8 );
-    yzxy = vec_sld( yzyz, xyxy, 8 );
-    zxyz = vec_sld( zxzx, yzyz, 8 );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-static inline void vmathSoaV3StoreHalfFloats( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_ushort8 *threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    vmathSoaV3StoreXYZArray( vec0, xyz0 );
-    vmathSoaV3StoreXYZArray( vec1, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-static inline void vmathSoaV3SetX( VmathSoaVector3 *result, vec_float4 _x )
-{
-    result->x = _x;
-}
-
-static inline vec_float4 vmathSoaV3GetX( const VmathSoaVector3 *vec )
-{
-    return vec->x;
-}
-
-static inline void vmathSoaV3SetY( VmathSoaVector3 *result, vec_float4 _y )
-{
-    result->y = _y;
-}
-
-static inline vec_float4 vmathSoaV3GetY( const VmathSoaVector3 *vec )
-{
-    return vec->y;
-}
-
-static inline void vmathSoaV3SetZ( VmathSoaVector3 *result, vec_float4 _z )
-{
-    result->z = _z;
-}
-
-static inline vec_float4 vmathSoaV3GetZ( const VmathSoaVector3 *vec )
-{
-    return vec->z;
-}
-
-static inline void vmathSoaV3SetElem( VmathSoaVector3 *result, int idx, vec_float4 value )
-{
-    *(&result->x + idx) = value;
-}
-
-static inline vec_float4 vmathSoaV3GetElem( const VmathSoaVector3 *vec, int idx )
-{
-    return *(&vec->x + idx);
-}
-
-static inline void vmathSoaV3Add( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = vec_add( vec0->x, vec1->x );
-    result->y = vec_add( vec0->y, vec1->y );
-    result->z = vec_add( vec0->z, vec1->z );
-}
-
-static inline void vmathSoaV3Sub( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = vec_sub( vec0->x, vec1->x );
-    result->y = vec_sub( vec0->y, vec1->y );
-    result->z = vec_sub( vec0->z, vec1->z );
-}
-
-static inline void vmathSoaV3AddP3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = vec_add( vec->x, pnt1->x );
-    result->y = vec_add( vec->y, pnt1->y );
-    result->z = vec_add( vec->z, pnt1->z );
-}
-
-static inline void vmathSoaV3ScalarMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar )
-{
-    result->x = vec_madd( vec->x, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->y = vec_madd( vec->y, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->z = vec_madd( vec->z, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaV3ScalarDiv( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar )
-{
-    result->x = divf4( vec->x, scalar );
-    result->y = divf4( vec->y, scalar );
-    result->z = divf4( vec->z, scalar );
-}
-
-static inline void vmathSoaV3Neg( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = negatef4( vec->x );
-    result->y = negatef4( vec->y );
-    result->z = negatef4( vec->z );
-}
-
-static inline void vmathSoaV3MulPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = vec_madd( vec0->x, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->y = vec_madd( vec0->y, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->z = vec_madd( vec0->z, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaV3DivPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = divf4( vec0->x, vec1->x );
-    result->y = divf4( vec0->y, vec1->y );
-    result->z = divf4( vec0->z, vec1->z );
-}
-
-static inline void vmathSoaV3RecipPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->x );
-    result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->y );
-    result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->z );
-}
-
-static inline void vmathSoaV3SqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = sqrtf4( vec->x );
-    result->y = sqrtf4( vec->y );
-    result->z = sqrtf4( vec->z );
-}
-
-static inline void vmathSoaV3RsqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->x ) );
-    result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->y ) );
-    result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->z ) );
-}
-
-static inline void vmathSoaV3AbsPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = fabsf4( vec->x );
-    result->y = fabsf4( vec->y );
-    result->z = fabsf4( vec->z );
-}
-
-static inline void vmathSoaV3CopySignPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = copysignf4( vec0->x, vec1->x );
-    result->y = copysignf4( vec0->y, vec1->y );
-    result->z = copysignf4( vec0->z, vec1->z );
-}
-
-static inline void vmathSoaV3MaxPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = fmaxf4( vec0->x, vec1->x );
-    result->y = fmaxf4( vec0->y, vec1->y );
-    result->z = fmaxf4( vec0->z, vec1->z );
-}
-
-static inline vec_float4 vmathSoaV3MaxElem( const VmathSoaVector3 *vec )
-{
-    vec_float4 result;
-    result = fmaxf4( vec->x, vec->y );
-    result = fmaxf4( vec->z, result );
-    return result;
-}
-
-static inline void vmathSoaV3MinPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = fminf4( vec0->x, vec1->x );
-    result->y = fminf4( vec0->y, vec1->y );
-    result->z = fminf4( vec0->z, vec1->z );
-}
-
-static inline vec_float4 vmathSoaV3MinElem( const VmathSoaVector3 *vec )
-{
-    vec_float4 result;
-    result = fminf4( vec->x, vec->y );
-    result = fminf4( vec->z, result );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV3Sum( const VmathSoaVector3 *vec )
-{
-    vec_float4 result;
-    result = vec_add( vec->x, vec->y );
-    result = vec_add( result, vec->z );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV3Dot( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    vec_float4 result;
-    result = vec_madd( vec0->x, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( vec0->y, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( vec0->z, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV3LengthSqr( const VmathSoaVector3 *vec )
-{
-    vec_float4 result;
-    result = vec_madd( vec->x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( vec->y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( vec->z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV3Length( const VmathSoaVector3 *vec )
-{
-    return sqrtf4( vmathSoaV3LengthSqr( vec ) );
-}
-
-static inline void vmathSoaV3Normalize( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    vec_float4 lenSqr, lenInv;
-    lenSqr = vmathSoaV3LengthSqr( vec );
-    lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
-    result->x = vec_madd( vec->x, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->y = vec_madd( vec->y, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->z = vec_madd( vec->z, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaV3Cross( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    vec_float4 tmpX, tmpY, tmpZ;
-    tmpX = vec_sub( vec_madd( vec0->y, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0->z, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpY = vec_sub( vec_madd( vec0->z, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0->x, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpZ = vec_sub( vec_madd( vec0->x, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0->y, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathSoaV3Select( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_uint4 select1 )
-{
-    result->x = vec_sel( vec0->x, vec1->x, select1 );
-    result->y = vec_sel( vec0->y, vec1->y, select1 );
-    result->z = vec_sel( vec0->z, vec1->z, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaV3Print( const VmathSoaVector3 *vec )
-{
-    VmathVector3 vec0, vec1, vec2, vec3;
-    vmathSoaV3Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathV3Print( &vec0 );
-    printf("slot 1:\n");
-    vmathV3Print( &vec1 );
-    printf("slot 2:\n");
-    vmathV3Print( &vec2 );
-    printf("slot 3:\n");
-    vmathV3Print( &vec3 );
-}
-
-static inline void vmathSoaV3Prints( const VmathSoaVector3 *vec, const char *name )
-{
-    VmathVector3 vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    vmathSoaV3Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathV3Print( &vec0 );
-    printf("slot 1:\n");
-    vmathV3Print( &vec1 );
-    printf("slot 2:\n");
-    vmathV3Print( &vec2 );
-    printf("slot 3:\n");
-    vmathV3Print( &vec3 );
-}
-
-#endif
-
-static inline void vmathSoaV4Copy( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-    result->w = vec->w;
-}
-
-static inline void vmathSoaV4MakeFromElems( VmathSoaVector4 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
-{
-    result->x = _x;
-    result->y = _y;
-    result->z = _z;
-    result->w = _w;
-}
-
-static inline void vmathSoaV4MakeFromV3Scalar( VmathSoaVector4 *result, const VmathSoaVector3 *xyz, vec_float4 _w )
-{
-    vmathSoaV4SetXYZ( result, xyz );
-    vmathSoaV4SetW( result, _w );
-}
-
-static inline void vmathSoaV4MakeFromV3( VmathSoaVector4 *result, const VmathSoaVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-    result->w = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-}
-
-static inline void vmathSoaV4MakeFromP3( VmathSoaVector4 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = pnt->x;
-    result->y = pnt->y;
-    result->z = pnt->z;
-    result->w = ((vec_float4){1.0f,1.0f,1.0f,1.0f});
-}
-
-static inline void vmathSoaV4MakeFromQ( VmathSoaVector4 *result, const VmathSoaQuat *quat )
-{
-    result->x = quat->x;
-    result->y = quat->y;
-    result->z = quat->z;
-    result->w = quat->w;
-}
-
-static inline void vmathSoaV4MakeFromScalar( VmathSoaVector4 *result, vec_float4 scalar )
-{
-    result->x = scalar;
-    result->y = scalar;
-    result->z = scalar;
-    result->w = scalar;
-}
-
-static inline void vmathSoaV4MakeFromAos( VmathSoaVector4 *result, const VmathVector4 *vec )
-{
-    vec_float4 vec128 = vec->vec128;
-    result->x = vec_splat( vec128, 0 );
-    result->y = vec_splat( vec128, 1 );
-    result->z = vec_splat( vec128, 2 );
-    result->w = vec_splat( vec128, 3 );
-}
-
-static inline void vmathSoaV4MakeFrom4Aos( VmathSoaVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = vec_mergeh( vec0->vec128, vec2->vec128 );
-    tmp1 = vec_mergeh( vec1->vec128, vec3->vec128 );
-    tmp2 = vec_mergel( vec0->vec128, vec2->vec128 );
-    tmp3 = vec_mergel( vec1->vec128, vec3->vec128 );
-    result->x = vec_mergeh( tmp0, tmp1 );
-    result->y = vec_mergel( tmp0, tmp1 );
-    result->z = vec_mergeh( tmp2, tmp3 );
-    result->w = vec_mergel( tmp2, tmp3 );
-}
-
-static inline void vmathSoaV4MakeXAxis( VmathSoaVector4 *result )
-{
-    vmathSoaV4MakeFromElems( result, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaV4MakeYAxis( VmathSoaVector4 *result )
-{
-    vmathSoaV4MakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaV4MakeZAxis( VmathSoaVector4 *result )
-{
-    vmathSoaV4MakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaV4MakeWAxis( VmathSoaVector4 *result )
-{
-    vmathSoaV4MakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-static inline void vmathSoaV4Lerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    VmathSoaVector4 tmpV4_0, tmpV4_1;
-    vmathSoaV4Sub( &tmpV4_0, vec1, vec0 );
-    vmathSoaV4ScalarMul( &tmpV4_1, &tmpV4_0, t );
-    vmathSoaV4Add( result, vec0, &tmpV4_1 );
-}
-
-static inline void vmathSoaV4Slerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *unitVec0, const VmathSoaVector4 *unitVec1 )
-{
-    VmathSoaVector4 tmpV4_0, tmpV4_1;
-    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
-    vec_uint4 selectMask;
-    cosAngle = vmathSoaV4Dot( unitVec0, unitVec1 );
-    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
-    angle = acosf4( cosAngle );
-    recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
-    scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
-    scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
-    vmathSoaV4ScalarMul( &tmpV4_0, unitVec0, scale0 );
-    vmathSoaV4ScalarMul( &tmpV4_1, unitVec1, scale1 );
-    vmathSoaV4Add( result, &tmpV4_0, &tmpV4_1 );
-}
-
-static inline void vmathSoaV4Get4Aos( const VmathSoaVector4 *vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = vec_mergeh( vec->x, vec->z );
-    tmp1 = vec_mergeh( vec->y, vec->w );
-    tmp2 = vec_mergel( vec->x, vec->z );
-    tmp3 = vec_mergel( vec->y, vec->w );
-    vmathV4MakeFrom128( result0, vec_mergeh( tmp0, tmp1 ) );
-    vmathV4MakeFrom128( result1, vec_mergel( tmp0, tmp1 ) );
-    vmathV4MakeFrom128( result2, vec_mergeh( tmp2, tmp3 ) );
-    vmathV4MakeFrom128( result3, vec_mergel( tmp2, tmp3 ) );
-}
-
-static inline void vmathSoaV4StoreHalfFloats( const VmathSoaVector4 *vec, vec_ushort8 *twoQuads )
-{
-    VmathVector4 v0, v1, v2, v3;
-    vmathSoaV4Get4Aos( vec, &v0, &v1, &v2, &v3 );
-    twoQuads[0] = _vmath2VfToHalfFloats(v0.vec128, v1.vec128);
-    twoQuads[1] = _vmath2VfToHalfFloats(v2.vec128, v3.vec128);
-}
-
-static inline void vmathSoaV4SetXYZ( VmathSoaVector4 *result, const VmathSoaVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-}
-
-static inline void vmathSoaV4GetXYZ( VmathSoaVector3 *result, const VmathSoaVector4 *vec )
-{
-    vmathSoaV3MakeFromElems( result, vec->x, vec->y, vec->z );
-}
-
-static inline void vmathSoaV4SetX( VmathSoaVector4 *result, vec_float4 _x )
-{
-    result->x = _x;
-}
-
-static inline vec_float4 vmathSoaV4GetX( const VmathSoaVector4 *vec )
-{
-    return vec->x;
-}
-
-static inline void vmathSoaV4SetY( VmathSoaVector4 *result, vec_float4 _y )
-{
-    result->y = _y;
-}
-
-static inline vec_float4 vmathSoaV4GetY( const VmathSoaVector4 *vec )
-{
-    return vec->y;
-}
-
-static inline void vmathSoaV4SetZ( VmathSoaVector4 *result, vec_float4 _z )
-{
-    result->z = _z;
-}
-
-static inline vec_float4 vmathSoaV4GetZ( const VmathSoaVector4 *vec )
-{
-    return vec->z;
-}
-
-static inline void vmathSoaV4SetW( VmathSoaVector4 *result, vec_float4 _w )
-{
-    result->w = _w;
-}
-
-static inline vec_float4 vmathSoaV4GetW( const VmathSoaVector4 *vec )
-{
-    return vec->w;
-}
-
-static inline void vmathSoaV4SetElem( VmathSoaVector4 *result, int idx, vec_float4 value )
-{
-    *(&result->x + idx) = value;
-}
-
-static inline vec_float4 vmathSoaV4GetElem( const VmathSoaVector4 *vec, int idx )
-{
-    return *(&vec->x + idx);
-}
-
-static inline void vmathSoaV4Add( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = vec_add( vec0->x, vec1->x );
-    result->y = vec_add( vec0->y, vec1->y );
-    result->z = vec_add( vec0->z, vec1->z );
-    result->w = vec_add( vec0->w, vec1->w );
-}
-
-static inline void vmathSoaV4Sub( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = vec_sub( vec0->x, vec1->x );
-    result->y = vec_sub( vec0->y, vec1->y );
-    result->z = vec_sub( vec0->z, vec1->z );
-    result->w = vec_sub( vec0->w, vec1->w );
-}
-
-static inline void vmathSoaV4ScalarMul( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar )
-{
-    result->x = vec_madd( vec->x, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->y = vec_madd( vec->y, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->z = vec_madd( vec->z, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->w = vec_madd( vec->w, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaV4ScalarDiv( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar )
-{
-    result->x = divf4( vec->x, scalar );
-    result->y = divf4( vec->y, scalar );
-    result->z = divf4( vec->z, scalar );
-    result->w = divf4( vec->w, scalar );
-}
-
-static inline void vmathSoaV4Neg( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    result->x = negatef4( vec->x );
-    result->y = negatef4( vec->y );
-    result->z = negatef4( vec->z );
-    result->w = negatef4( vec->w );
-}
-
-static inline void vmathSoaV4MulPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = vec_madd( vec0->x, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->y = vec_madd( vec0->y, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->z = vec_madd( vec0->z, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->w = vec_madd( vec0->w, vec1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaV4DivPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = divf4( vec0->x, vec1->x );
-    result->y = divf4( vec0->y, vec1->y );
-    result->z = divf4( vec0->z, vec1->z );
-    result->w = divf4( vec0->w, vec1->w );
-}
-
-static inline void vmathSoaV4RecipPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->x );
-    result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->y );
-    result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->z );
-    result->w = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->w );
-}
-
-static inline void vmathSoaV4SqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    result->x = sqrtf4( vec->x );
-    result->y = sqrtf4( vec->y );
-    result->z = sqrtf4( vec->z );
-    result->w = sqrtf4( vec->w );
-}
-
-static inline void vmathSoaV4RsqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->x ) );
-    result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->y ) );
-    result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->z ) );
-    result->w = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->w ) );
-}
-
-static inline void vmathSoaV4AbsPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    result->x = fabsf4( vec->x );
-    result->y = fabsf4( vec->y );
-    result->z = fabsf4( vec->z );
-    result->w = fabsf4( vec->w );
-}
-
-static inline void vmathSoaV4CopySignPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = copysignf4( vec0->x, vec1->x );
-    result->y = copysignf4( vec0->y, vec1->y );
-    result->z = copysignf4( vec0->z, vec1->z );
-    result->w = copysignf4( vec0->w, vec1->w );
-}
-
-static inline void vmathSoaV4MaxPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = fmaxf4( vec0->x, vec1->x );
-    result->y = fmaxf4( vec0->y, vec1->y );
-    result->z = fmaxf4( vec0->z, vec1->z );
-    result->w = fmaxf4( vec0->w, vec1->w );
-}
-
-static inline vec_float4 vmathSoaV4MaxElem( const VmathSoaVector4 *vec )
-{
-    vec_float4 result;
-    result = fmaxf4( vec->x, vec->y );
-    result = fmaxf4( vec->z, result );
-    result = fmaxf4( vec->w, result );
-    return result;
-}
-
-static inline void vmathSoaV4MinPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = fminf4( vec0->x, vec1->x );
-    result->y = fminf4( vec0->y, vec1->y );
-    result->z = fminf4( vec0->z, vec1->z );
-    result->w = fminf4( vec0->w, vec1->w );
-}
-
-static inline vec_float4 vmathSoaV4MinElem( const VmathSoaVector4 *vec )
-{
-    vec_float4 result;
-    result = fminf4( vec->x, vec->y );
-    result = fminf4( vec->z, result );
-    result = fminf4( vec->w, result );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV4Sum( const VmathSoaVector4 *vec )
-{
-    vec_float4 result;
-    result = vec_add( vec->x, vec->y );
-    result = vec_add( result, vec->z );
-    result = vec_add( result, vec->w );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV4Dot( const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    vec_float4 result;
-    result = vec_madd( vec0->x, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( vec0->y, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( vec0->z, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( vec0->w, vec1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV4LengthSqr( const VmathSoaVector4 *vec )
-{
-    vec_float4 result;
-    result = vec_madd( vec->x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( vec->y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( vec->z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( vec->w, vec->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV4Length( const VmathSoaVector4 *vec )
-{
-    return sqrtf4( vmathSoaV4LengthSqr( vec ) );
-}
-
-static inline void vmathSoaV4Normalize( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    vec_float4 lenSqr, lenInv;
-    lenSqr = vmathSoaV4LengthSqr( vec );
-    lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
-    result->x = vec_madd( vec->x, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->y = vec_madd( vec->y, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->z = vec_madd( vec->z, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->w = vec_madd( vec->w, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaV4Select( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1, vec_uint4 select1 )
-{
-    result->x = vec_sel( vec0->x, vec1->x, select1 );
-    result->y = vec_sel( vec0->y, vec1->y, select1 );
-    result->z = vec_sel( vec0->z, vec1->z, select1 );
-    result->w = vec_sel( vec0->w, vec1->w, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaV4Print( const VmathSoaVector4 *vec )
-{
-    VmathVector4 vec0, vec1, vec2, vec3;
-    vmathSoaV4Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathV4Print( &vec0 );
-    printf("slot 1:\n");
-    vmathV4Print( &vec1 );
-    printf("slot 2:\n");
-    vmathV4Print( &vec2 );
-    printf("slot 3:\n");
-    vmathV4Print( &vec3 );
-}
-
-static inline void vmathSoaV4Prints( const VmathSoaVector4 *vec, const char *name )
-{
-    VmathVector4 vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    vmathSoaV4Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathV4Print( &vec0 );
-    printf("slot 1:\n");
-    vmathV4Print( &vec1 );
-    printf("slot 2:\n");
-    vmathV4Print( &vec2 );
-    printf("slot 3:\n");
-    vmathV4Print( &vec3 );
-}
-
-#endif
-
-static inline void vmathSoaP3Copy( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = pnt->x;
-    result->y = pnt->y;
-    result->z = pnt->z;
-}
-
-static inline void vmathSoaP3MakeFromElems( VmathSoaPoint3 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z )
-{
-    result->x = _x;
-    result->y = _y;
-    result->z = _z;
-}
-
-static inline void vmathSoaP3MakeFromV3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-}
-
-static inline void vmathSoaP3MakeFromScalar( VmathSoaPoint3 *result, vec_float4 scalar )
-{
-    result->x = scalar;
-    result->y = scalar;
-    result->z = scalar;
-}
-
-static inline void vmathSoaP3MakeFromAos( VmathSoaPoint3 *result, const VmathPoint3 *pnt )
-{
-    vec_float4 vec128 = pnt->vec128;
-    result->x = vec_splat( vec128, 0 );
-    result->y = vec_splat( vec128, 1 );
-    result->z = vec_splat( vec128, 2 );
-}
-
-static inline void vmathSoaP3MakeFrom4Aos( VmathSoaPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = vec_mergeh( pnt0->vec128, pnt2->vec128 );
-    tmp1 = vec_mergeh( pnt1->vec128, pnt3->vec128 );
-    tmp2 = vec_mergel( pnt0->vec128, pnt2->vec128 );
-    tmp3 = vec_mergel( pnt1->vec128, pnt3->vec128 );
-    result->x = vec_mergeh( tmp0, tmp1 );
-    result->y = vec_mergel( tmp0, tmp1 );
-    result->z = vec_mergeh( tmp2, tmp3 );
-}
-
-static inline void vmathSoaP3Lerp( VmathSoaPoint3 *result, vec_float4 t, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    VmathSoaVector3 tmpV3_0, tmpV3_1;
-    vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
-    vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
-    vmathSoaP3AddV3( result, pnt0, &tmpV3_1 );
-}
-
-static inline void vmathSoaP3Get4Aos( const VmathSoaPoint3 *pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 )
-{
-    vec_float4 tmp0, tmp1;
-    tmp0 = vec_mergeh( pnt->x, pnt->z );
-    tmp1 = vec_mergel( pnt->x, pnt->z );
-    vmathP3MakeFrom128( result0, vec_mergeh( tmp0, pnt->y ) );
-    vmathP3MakeFrom128( result1, vec_perm( tmp0, pnt->y, _VECTORMATH_PERM_ZBWX ) );
-    vmathP3MakeFrom128( result2, vec_perm( tmp1, pnt->y, _VECTORMATH_PERM_XCYX ) );
-    vmathP3MakeFrom128( result3, vec_perm( tmp1, pnt->y, _VECTORMATH_PERM_ZDWX ) );
-}
-
-static inline void vmathSoaP3LoadXYZArray( VmathSoaPoint3 *vec, const vec_float4 *threeQuads )
-{
-    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyxy = vec_sld( yzxy, xyzx, 8 );
-    zxzx = vec_sld( xyzx, zxyz, 8 );
-    yzyz = vec_sld( zxyz, yzxy, 8 );
-    vmathSoaP3SetX( vec, vec_perm( xyxy, zxzx, _VECTORMATH_PERM_ZBXD ) );
-    vmathSoaP3SetY( vec, vec_perm( xyxy, yzyz, _VECTORMATH_PERM_WCYA ) );
-    vmathSoaP3SetZ( vec, vec_perm( zxzx, yzyz, _VECTORMATH_PERM_XDZB ) );
-}
-
-static inline void vmathSoaP3StoreXYZArray( const VmathSoaPoint3 *vec, vec_float4 *threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
-    xyxy = vec_perm( vec->x, vec->y, _VECTORMATH_PERM_ZCXA );
-    zxzx = vec_perm( vec->z, vec->x, _VECTORMATH_PERM_XBZD );
-    yzyz = vec_perm( vec->y, vec->z, _VECTORMATH_PERM_WDYB );
-    xyzx = vec_sld( xyxy, zxzx, 8 );
-    yzxy = vec_sld( yzyz, xyxy, 8 );
-    zxyz = vec_sld( zxzx, yzyz, 8 );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-static inline void vmathSoaP3StoreHalfFloats( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_ushort8 *threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    vmathSoaP3StoreXYZArray( pnt0, xyz0 );
-    vmathSoaP3StoreXYZArray( pnt1, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-static inline void vmathSoaP3SetX( VmathSoaPoint3 *result, vec_float4 _x )
-{
-    result->x = _x;
-}
-
-static inline vec_float4 vmathSoaP3GetX( const VmathSoaPoint3 *pnt )
-{
-    return pnt->x;
-}
-
-static inline void vmathSoaP3SetY( VmathSoaPoint3 *result, vec_float4 _y )
-{
-    result->y = _y;
-}
-
-static inline vec_float4 vmathSoaP3GetY( const VmathSoaPoint3 *pnt )
-{
-    return pnt->y;
-}
-
-static inline void vmathSoaP3SetZ( VmathSoaPoint3 *result, vec_float4 _z )
-{
-    result->z = _z;
-}
-
-static inline vec_float4 vmathSoaP3GetZ( const VmathSoaPoint3 *pnt )
-{
-    return pnt->z;
-}
-
-static inline void vmathSoaP3SetElem( VmathSoaPoint3 *result, int idx, vec_float4 value )
-{
-    *(&result->x + idx) = value;
-}
-
-static inline vec_float4 vmathSoaP3GetElem( const VmathSoaPoint3 *pnt, int idx )
-{
-    return *(&pnt->x + idx);
-}
-
-static inline void vmathSoaP3Sub( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = vec_sub( pnt0->x, pnt1->x );
-    result->y = vec_sub( pnt0->y, pnt1->y );
-    result->z = vec_sub( pnt0->z, pnt1->z );
-}
-
-static inline void vmathSoaP3AddV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec1 )
-{
-    result->x = vec_add( pnt->x, vec1->x );
-    result->y = vec_add( pnt->y, vec1->y );
-    result->z = vec_add( pnt->z, vec1->z );
-}
-
-static inline void vmathSoaP3SubV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec1 )
-{
-    result->x = vec_sub( pnt->x, vec1->x );
-    result->y = vec_sub( pnt->y, vec1->y );
-    result->z = vec_sub( pnt->z, vec1->z );
-}
-
-static inline void vmathSoaP3MulPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = vec_madd( pnt0->x, pnt1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->y = vec_madd( pnt0->y, pnt1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->z = vec_madd( pnt0->z, pnt1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaP3DivPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = divf4( pnt0->x, pnt1->x );
-    result->y = divf4( pnt0->y, pnt1->y );
-    result->z = divf4( pnt0->z, pnt1->z );
-}
-
-static inline void vmathSoaP3RecipPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt->x );
-    result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt->y );
-    result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt->z );
-}
-
-static inline void vmathSoaP3SqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = sqrtf4( pnt->x );
-    result->y = sqrtf4( pnt->y );
-    result->z = sqrtf4( pnt->z );
-}
-
-static inline void vmathSoaP3RsqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt->x ) );
-    result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt->y ) );
-    result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt->z ) );
-}
-
-static inline void vmathSoaP3AbsPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = fabsf4( pnt->x );
-    result->y = fabsf4( pnt->y );
-    result->z = fabsf4( pnt->z );
-}
-
-static inline void vmathSoaP3CopySignPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = copysignf4( pnt0->x, pnt1->x );
-    result->y = copysignf4( pnt0->y, pnt1->y );
-    result->z = copysignf4( pnt0->z, pnt1->z );
-}
-
-static inline void vmathSoaP3MaxPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = fmaxf4( pnt0->x, pnt1->x );
-    result->y = fmaxf4( pnt0->y, pnt1->y );
-    result->z = fmaxf4( pnt0->z, pnt1->z );
-}
-
-static inline vec_float4 vmathSoaP3MaxElem( const VmathSoaPoint3 *pnt )
-{
-    vec_float4 result;
-    result = fmaxf4( pnt->x, pnt->y );
-    result = fmaxf4( pnt->z, result );
-    return result;
-}
-
-static inline void vmathSoaP3MinPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = fminf4( pnt0->x, pnt1->x );
-    result->y = fminf4( pnt0->y, pnt1->y );
-    result->z = fminf4( pnt0->z, pnt1->z );
-}
-
-static inline vec_float4 vmathSoaP3MinElem( const VmathSoaPoint3 *pnt )
-{
-    vec_float4 result;
-    result = fminf4( pnt->x, pnt->y );
-    result = fminf4( pnt->z, result );
-    return result;
-}
-
-static inline vec_float4 vmathSoaP3Sum( const VmathSoaPoint3 *pnt )
-{
-    vec_float4 result;
-    result = vec_add( pnt->x, pnt->y );
-    result = vec_add( result, pnt->z );
-    return result;
-}
-
-static inline void vmathSoaP3Scale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, vec_float4 scaleVal )
-{
-    VmathSoaPoint3 tmpP3_0;
-    vmathSoaP3MakeFromScalar( &tmpP3_0, scaleVal );
-    vmathSoaP3MulPerElem( result, pnt, &tmpP3_0 );
-}
-
-static inline void vmathSoaP3NonUniformScale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *scaleVec )
-{
-    VmathSoaPoint3 tmpP3_0;
-    vmathSoaP3MakeFromV3( &tmpP3_0, scaleVec );
-    vmathSoaP3MulPerElem( result, pnt, &tmpP3_0 );
-}
-
-static inline vec_float4 vmathSoaP3Projection( const VmathSoaPoint3 *pnt, const VmathSoaVector3 *unitVec )
-{
-    vec_float4 result;
-    result = vec_madd( pnt->x, unitVec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( pnt->y, unitVec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( pnt->z, unitVec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaP3DistSqrFromOrigin( const VmathSoaPoint3 *pnt )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaV3MakeFromP3( &tmpV3_0, pnt );
-    return vmathSoaV3LengthSqr( &tmpV3_0 );
-}
-
-static inline vec_float4 vmathSoaP3DistFromOrigin( const VmathSoaPoint3 *pnt )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaV3MakeFromP3( &tmpV3_0, pnt );
-    return vmathSoaV3Length( &tmpV3_0 );
-}
-
-static inline vec_float4 vmathSoaP3DistSqr( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
-    return vmathSoaV3LengthSqr( &tmpV3_0 );
-}
-
-static inline vec_float4 vmathSoaP3Dist( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
-    return vmathSoaV3Length( &tmpV3_0 );
-}
-
-static inline void vmathSoaP3Select( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_uint4 select1 )
-{
-    result->x = vec_sel( pnt0->x, pnt1->x, select1 );
-    result->y = vec_sel( pnt0->y, pnt1->y, select1 );
-    result->z = vec_sel( pnt0->z, pnt1->z, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaP3Print( const VmathSoaPoint3 *pnt )
-{
-    VmathPoint3 vec0, vec1, vec2, vec3;
-    vmathSoaP3Get4Aos( pnt, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathP3Print( &vec0 );
-    printf("slot 1:\n");
-    vmathP3Print( &vec1 );
-    printf("slot 2:\n");
-    vmathP3Print( &vec2 );
-    printf("slot 3:\n");
-    vmathP3Print( &vec3 );
-}
-
-static inline void vmathSoaP3Prints( const VmathSoaPoint3 *pnt, const char *name )
-{
-    VmathPoint3 vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    vmathSoaP3Get4Aos( pnt, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathP3Print( &vec0 );
-    printf("slot 1:\n");
-    vmathP3Print( &vec1 );
-    printf("slot 2:\n");
-    vmathP3Print( &vec2 );
-    printf("slot 3:\n");
-    vmathP3Print( &vec3 );
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_SOA_C_H
+#define _VECTORMATH_VEC_SOA_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ * for permutes, words are labeled [x,y,z,w] [a,b,c,d]
+ */
+#define _VECTORMATH_PERM_X 0x00010203
+#define _VECTORMATH_PERM_Y 0x04050607
+#define _VECTORMATH_PERM_Z 0x08090a0b
+#define _VECTORMATH_PERM_W 0x0c0d0e0f
+#define _VECTORMATH_PERM_A 0x10111213
+#define _VECTORMATH_PERM_B 0x14151617
+#define _VECTORMATH_PERM_C 0x18191a1b
+#define _VECTORMATH_PERM_D 0x1c1d1e1f
+#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_ZDWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_ZCXA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_A })
+#define _VECTORMATH_PERM_XBZD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_WDYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B })
+#define _VECTORMATH_PERM_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_WCYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
+#define _VECTORMATH_PERM_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B })
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline void vmathSoaV3Copy( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+}
+
+static inline void vmathSoaV3MakeFromElems( VmathSoaVector3 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z )
+{
+    result->x = _x;
+    result->y = _y;
+    result->z = _z;
+}
+
+static inline void vmathSoaV3MakeFromP3( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = pnt->x;
+    result->y = pnt->y;
+    result->z = pnt->z;
+}
+
+static inline void vmathSoaV3MakeFromScalar( VmathSoaVector3 *result, vec_float4 scalar )
+{
+    result->x = scalar;
+    result->y = scalar;
+    result->z = scalar;
+}
+
+static inline void vmathSoaV3MakeFromAos( VmathSoaVector3 *result, const VmathVector3 *vec )
+{
+    vec_float4 vec128 = vec->vec128;
+    result->x = vec_splat( vec128, 0 );
+    result->y = vec_splat( vec128, 1 );
+    result->z = vec_splat( vec128, 2 );
+}
+
+static inline void vmathSoaV3MakeFrom4Aos( VmathSoaVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = vec_mergeh( vec0->vec128, vec2->vec128 );
+    tmp1 = vec_mergeh( vec1->vec128, vec3->vec128 );
+    tmp2 = vec_mergel( vec0->vec128, vec2->vec128 );
+    tmp3 = vec_mergel( vec1->vec128, vec3->vec128 );
+    result->x = vec_mergeh( tmp0, tmp1 );
+    result->y = vec_mergel( tmp0, tmp1 );
+    result->z = vec_mergeh( tmp2, tmp3 );
+}
+
+static inline void vmathSoaV3MakeXAxis( VmathSoaVector3 *result )
+{
+    vmathSoaV3MakeFromElems( result, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaV3MakeYAxis( VmathSoaVector3 *result )
+{
+    vmathSoaV3MakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaV3MakeZAxis( VmathSoaVector3 *result )
+{
+    vmathSoaV3MakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+static inline void vmathSoaV3Lerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    VmathSoaVector3 tmpV3_0, tmpV3_1;
+    vmathSoaV3Sub( &tmpV3_0, vec1, vec0 );
+    vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
+    vmathSoaV3Add( result, vec0, &tmpV3_1 );
+}
+
+static inline void vmathSoaV3Slerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 )
+{
+    VmathSoaVector3 tmpV3_0, tmpV3_1;
+    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
+    vec_uint4 selectMask;
+    cosAngle = vmathSoaV3Dot( unitVec0, unitVec1 );
+    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
+    angle = acosf4( cosAngle );
+    recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
+    scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
+    scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
+    vmathSoaV3ScalarMul( &tmpV3_0, unitVec0, scale0 );
+    vmathSoaV3ScalarMul( &tmpV3_1, unitVec1, scale1 );
+    vmathSoaV3Add( result, &tmpV3_0, &tmpV3_1 );
+}
+
+static inline void vmathSoaV3Get4Aos( const VmathSoaVector3 *vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 )
+{
+    vec_float4 tmp0, tmp1;
+    tmp0 = vec_mergeh( vec->x, vec->z );
+    tmp1 = vec_mergel( vec->x, vec->z );
+    vmathV3MakeFrom128( result0, vec_mergeh( tmp0, vec->y ) );
+    vmathV3MakeFrom128( result1, vec_perm( tmp0, vec->y, _VECTORMATH_PERM_ZBWX ) );
+    vmathV3MakeFrom128( result2, vec_perm( tmp1, vec->y, _VECTORMATH_PERM_XCYX ) );
+    vmathV3MakeFrom128( result3, vec_perm( tmp1, vec->y, _VECTORMATH_PERM_ZDWX ) );
+}
+
+static inline void vmathSoaV3LoadXYZArray( VmathSoaVector3 *vec, const vec_float4 *threeQuads )
+{
+    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyxy = vec_sld( yzxy, xyzx, 8 );
+    zxzx = vec_sld( xyzx, zxyz, 8 );
+    yzyz = vec_sld( zxyz, yzxy, 8 );
+    vmathSoaV3SetX( vec, vec_perm( xyxy, zxzx, _VECTORMATH_PERM_ZBXD ) );
+    vmathSoaV3SetY( vec, vec_perm( xyxy, yzyz, _VECTORMATH_PERM_WCYA ) );
+    vmathSoaV3SetZ( vec, vec_perm( zxzx, yzyz, _VECTORMATH_PERM_XDZB ) );
+}
+
+static inline void vmathSoaV3StoreXYZArray( const VmathSoaVector3 *vec, vec_float4 *threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
+    xyxy = vec_perm( vec->x, vec->y, _VECTORMATH_PERM_ZCXA );
+    zxzx = vec_perm( vec->z, vec->x, _VECTORMATH_PERM_XBZD );
+    yzyz = vec_perm( vec->y, vec->z, _VECTORMATH_PERM_WDYB );
+    xyzx = vec_sld( xyxy, zxzx, 8 );
+    yzxy = vec_sld( yzyz, xyxy, 8 );
+    zxyz = vec_sld( zxzx, yzyz, 8 );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+static inline void vmathSoaV3StoreHalfFloats( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_ushort8 *threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    vmathSoaV3StoreXYZArray( vec0, xyz0 );
+    vmathSoaV3StoreXYZArray( vec1, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+static inline void vmathSoaV3SetX( VmathSoaVector3 *result, vec_float4 _x )
+{
+    result->x = _x;
+}
+
+static inline vec_float4 vmathSoaV3GetX( const VmathSoaVector3 *vec )
+{
+    return vec->x;
+}
+
+static inline void vmathSoaV3SetY( VmathSoaVector3 *result, vec_float4 _y )
+{
+    result->y = _y;
+}
+
+static inline vec_float4 vmathSoaV3GetY( const VmathSoaVector3 *vec )
+{
+    return vec->y;
+}
+
+static inline void vmathSoaV3SetZ( VmathSoaVector3 *result, vec_float4 _z )
+{
+    result->z = _z;
+}
+
+static inline vec_float4 vmathSoaV3GetZ( const VmathSoaVector3 *vec )
+{
+    return vec->z;
+}
+
+static inline void vmathSoaV3SetElem( VmathSoaVector3 *result, int idx, vec_float4 value )
+{
+    *(&result->x + idx) = value;
+}
+
+static inline vec_float4 vmathSoaV3GetElem( const VmathSoaVector3 *vec, int idx )
+{
+    return *(&vec->x + idx);
+}
+
+static inline void vmathSoaV3Add( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = vec_add( vec0->x, vec1->x );
+    result->y = vec_add( vec0->y, vec1->y );
+    result->z = vec_add( vec0->z, vec1->z );
+}
+
+static inline void vmathSoaV3Sub( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = vec_sub( vec0->x, vec1->x );
+    result->y = vec_sub( vec0->y, vec1->y );
+    result->z = vec_sub( vec0->z, vec1->z );
+}
+
+static inline void vmathSoaV3AddP3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = vec_add( vec->x, pnt1->x );
+    result->y = vec_add( vec->y, pnt1->y );
+    result->z = vec_add( vec->z, pnt1->z );
+}
+
+static inline void vmathSoaV3ScalarMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar )
+{
+    result->x = vec_madd( vec->x, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->y = vec_madd( vec->y, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->z = vec_madd( vec->z, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaV3ScalarDiv( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar )
+{
+    result->x = divf4( vec->x, scalar );
+    result->y = divf4( vec->y, scalar );
+    result->z = divf4( vec->z, scalar );
+}
+
+static inline void vmathSoaV3Neg( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = negatef4( vec->x );
+    result->y = negatef4( vec->y );
+    result->z = negatef4( vec->z );
+}
+
+static inline void vmathSoaV3MulPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = vec_madd( vec0->x, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->y = vec_madd( vec0->y, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->z = vec_madd( vec0->z, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaV3DivPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = divf4( vec0->x, vec1->x );
+    result->y = divf4( vec0->y, vec1->y );
+    result->z = divf4( vec0->z, vec1->z );
+}
+
+static inline void vmathSoaV3RecipPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->x );
+    result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->y );
+    result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->z );
+}
+
+static inline void vmathSoaV3SqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = sqrtf4( vec->x );
+    result->y = sqrtf4( vec->y );
+    result->z = sqrtf4( vec->z );
+}
+
+static inline void vmathSoaV3RsqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->x ) );
+    result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->y ) );
+    result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->z ) );
+}
+
+static inline void vmathSoaV3AbsPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = fabsf4( vec->x );
+    result->y = fabsf4( vec->y );
+    result->z = fabsf4( vec->z );
+}
+
+static inline void vmathSoaV3CopySignPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = copysignf4( vec0->x, vec1->x );
+    result->y = copysignf4( vec0->y, vec1->y );
+    result->z = copysignf4( vec0->z, vec1->z );
+}
+
+static inline void vmathSoaV3MaxPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = fmaxf4( vec0->x, vec1->x );
+    result->y = fmaxf4( vec0->y, vec1->y );
+    result->z = fmaxf4( vec0->z, vec1->z );
+}
+
+static inline vec_float4 vmathSoaV3MaxElem( const VmathSoaVector3 *vec )
+{
+    vec_float4 result;
+    result = fmaxf4( vec->x, vec->y );
+    result = fmaxf4( vec->z, result );
+    return result;
+}
+
+static inline void vmathSoaV3MinPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = fminf4( vec0->x, vec1->x );
+    result->y = fminf4( vec0->y, vec1->y );
+    result->z = fminf4( vec0->z, vec1->z );
+}
+
+static inline vec_float4 vmathSoaV3MinElem( const VmathSoaVector3 *vec )
+{
+    vec_float4 result;
+    result = fminf4( vec->x, vec->y );
+    result = fminf4( vec->z, result );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV3Sum( const VmathSoaVector3 *vec )
+{
+    vec_float4 result;
+    result = vec_add( vec->x, vec->y );
+    result = vec_add( result, vec->z );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV3Dot( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    vec_float4 result;
+    result = vec_madd( vec0->x, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( vec0->y, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( vec0->z, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV3LengthSqr( const VmathSoaVector3 *vec )
+{
+    vec_float4 result;
+    result = vec_madd( vec->x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( vec->y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( vec->z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV3Length( const VmathSoaVector3 *vec )
+{
+    return sqrtf4( vmathSoaV3LengthSqr( vec ) );
+}
+
+static inline void vmathSoaV3Normalize( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    vec_float4 lenSqr, lenInv;
+    lenSqr = vmathSoaV3LengthSqr( vec );
+    lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
+    result->x = vec_madd( vec->x, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->y = vec_madd( vec->y, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->z = vec_madd( vec->z, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaV3Cross( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    vec_float4 tmpX, tmpY, tmpZ;
+    tmpX = vec_sub( vec_madd( vec0->y, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0->z, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpY = vec_sub( vec_madd( vec0->z, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0->x, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpZ = vec_sub( vec_madd( vec0->x, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0->y, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathSoaV3Select( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_uint4 select1 )
+{
+    result->x = vec_sel( vec0->x, vec1->x, select1 );
+    result->y = vec_sel( vec0->y, vec1->y, select1 );
+    result->z = vec_sel( vec0->z, vec1->z, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaV3Print( const VmathSoaVector3 *vec )
+{
+    VmathVector3 vec0, vec1, vec2, vec3;
+    vmathSoaV3Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathV3Print( &vec0 );
+    printf("slot 1:\n");
+    vmathV3Print( &vec1 );
+    printf("slot 2:\n");
+    vmathV3Print( &vec2 );
+    printf("slot 3:\n");
+    vmathV3Print( &vec3 );
+}
+
+static inline void vmathSoaV3Prints( const VmathSoaVector3 *vec, const char *name )
+{
+    VmathVector3 vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    vmathSoaV3Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathV3Print( &vec0 );
+    printf("slot 1:\n");
+    vmathV3Print( &vec1 );
+    printf("slot 2:\n");
+    vmathV3Print( &vec2 );
+    printf("slot 3:\n");
+    vmathV3Print( &vec3 );
+}
+
+#endif
+
+static inline void vmathSoaV4Copy( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+    result->w = vec->w;
+}
+
+static inline void vmathSoaV4MakeFromElems( VmathSoaVector4 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
+{
+    result->x = _x;
+    result->y = _y;
+    result->z = _z;
+    result->w = _w;
+}
+
+static inline void vmathSoaV4MakeFromV3Scalar( VmathSoaVector4 *result, const VmathSoaVector3 *xyz, vec_float4 _w )
+{
+    vmathSoaV4SetXYZ( result, xyz );
+    vmathSoaV4SetW( result, _w );
+}
+
+static inline void vmathSoaV4MakeFromV3( VmathSoaVector4 *result, const VmathSoaVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+    result->w = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+}
+
+static inline void vmathSoaV4MakeFromP3( VmathSoaVector4 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = pnt->x;
+    result->y = pnt->y;
+    result->z = pnt->z;
+    result->w = ((vec_float4){1.0f,1.0f,1.0f,1.0f});
+}
+
+static inline void vmathSoaV4MakeFromQ( VmathSoaVector4 *result, const VmathSoaQuat *quat )
+{
+    result->x = quat->x;
+    result->y = quat->y;
+    result->z = quat->z;
+    result->w = quat->w;
+}
+
+static inline void vmathSoaV4MakeFromScalar( VmathSoaVector4 *result, vec_float4 scalar )
+{
+    result->x = scalar;
+    result->y = scalar;
+    result->z = scalar;
+    result->w = scalar;
+}
+
+static inline void vmathSoaV4MakeFromAos( VmathSoaVector4 *result, const VmathVector4 *vec )
+{
+    vec_float4 vec128 = vec->vec128;
+    result->x = vec_splat( vec128, 0 );
+    result->y = vec_splat( vec128, 1 );
+    result->z = vec_splat( vec128, 2 );
+    result->w = vec_splat( vec128, 3 );
+}
+
+static inline void vmathSoaV4MakeFrom4Aos( VmathSoaVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = vec_mergeh( vec0->vec128, vec2->vec128 );
+    tmp1 = vec_mergeh( vec1->vec128, vec3->vec128 );
+    tmp2 = vec_mergel( vec0->vec128, vec2->vec128 );
+    tmp3 = vec_mergel( vec1->vec128, vec3->vec128 );
+    result->x = vec_mergeh( tmp0, tmp1 );
+    result->y = vec_mergel( tmp0, tmp1 );
+    result->z = vec_mergeh( tmp2, tmp3 );
+    result->w = vec_mergel( tmp2, tmp3 );
+}
+
+static inline void vmathSoaV4MakeXAxis( VmathSoaVector4 *result )
+{
+    vmathSoaV4MakeFromElems( result, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaV4MakeYAxis( VmathSoaVector4 *result )
+{
+    vmathSoaV4MakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaV4MakeZAxis( VmathSoaVector4 *result )
+{
+    vmathSoaV4MakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaV4MakeWAxis( VmathSoaVector4 *result )
+{
+    vmathSoaV4MakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+static inline void vmathSoaV4Lerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    VmathSoaVector4 tmpV4_0, tmpV4_1;
+    vmathSoaV4Sub( &tmpV4_0, vec1, vec0 );
+    vmathSoaV4ScalarMul( &tmpV4_1, &tmpV4_0, t );
+    vmathSoaV4Add( result, vec0, &tmpV4_1 );
+}
+
+static inline void vmathSoaV4Slerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *unitVec0, const VmathSoaVector4 *unitVec1 )
+{
+    VmathSoaVector4 tmpV4_0, tmpV4_1;
+    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
+    vec_uint4 selectMask;
+    cosAngle = vmathSoaV4Dot( unitVec0, unitVec1 );
+    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
+    angle = acosf4( cosAngle );
+    recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
+    scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
+    scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
+    vmathSoaV4ScalarMul( &tmpV4_0, unitVec0, scale0 );
+    vmathSoaV4ScalarMul( &tmpV4_1, unitVec1, scale1 );
+    vmathSoaV4Add( result, &tmpV4_0, &tmpV4_1 );
+}
+
+static inline void vmathSoaV4Get4Aos( const VmathSoaVector4 *vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = vec_mergeh( vec->x, vec->z );
+    tmp1 = vec_mergeh( vec->y, vec->w );
+    tmp2 = vec_mergel( vec->x, vec->z );
+    tmp3 = vec_mergel( vec->y, vec->w );
+    vmathV4MakeFrom128( result0, vec_mergeh( tmp0, tmp1 ) );
+    vmathV4MakeFrom128( result1, vec_mergel( tmp0, tmp1 ) );
+    vmathV4MakeFrom128( result2, vec_mergeh( tmp2, tmp3 ) );
+    vmathV4MakeFrom128( result3, vec_mergel( tmp2, tmp3 ) );
+}
+
+static inline void vmathSoaV4StoreHalfFloats( const VmathSoaVector4 *vec, vec_ushort8 *twoQuads )
+{
+    VmathVector4 v0, v1, v2, v3;
+    vmathSoaV4Get4Aos( vec, &v0, &v1, &v2, &v3 );
+    twoQuads[0] = _vmath2VfToHalfFloats(v0.vec128, v1.vec128);
+    twoQuads[1] = _vmath2VfToHalfFloats(v2.vec128, v3.vec128);
+}
+
+static inline void vmathSoaV4SetXYZ( VmathSoaVector4 *result, const VmathSoaVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+}
+
+static inline void vmathSoaV4GetXYZ( VmathSoaVector3 *result, const VmathSoaVector4 *vec )
+{
+    vmathSoaV3MakeFromElems( result, vec->x, vec->y, vec->z );
+}
+
+static inline void vmathSoaV4SetX( VmathSoaVector4 *result, vec_float4 _x )
+{
+    result->x = _x;
+}
+
+static inline vec_float4 vmathSoaV4GetX( const VmathSoaVector4 *vec )
+{
+    return vec->x;
+}
+
+static inline void vmathSoaV4SetY( VmathSoaVector4 *result, vec_float4 _y )
+{
+    result->y = _y;
+}
+
+static inline vec_float4 vmathSoaV4GetY( const VmathSoaVector4 *vec )
+{
+    return vec->y;
+}
+
+static inline void vmathSoaV4SetZ( VmathSoaVector4 *result, vec_float4 _z )
+{
+    result->z = _z;
+}
+
+static inline vec_float4 vmathSoaV4GetZ( const VmathSoaVector4 *vec )
+{
+    return vec->z;
+}
+
+static inline void vmathSoaV4SetW( VmathSoaVector4 *result, vec_float4 _w )
+{
+    result->w = _w;
+}
+
+static inline vec_float4 vmathSoaV4GetW( const VmathSoaVector4 *vec )
+{
+    return vec->w;
+}
+
+static inline void vmathSoaV4SetElem( VmathSoaVector4 *result, int idx, vec_float4 value )
+{
+    *(&result->x + idx) = value;
+}
+
+static inline vec_float4 vmathSoaV4GetElem( const VmathSoaVector4 *vec, int idx )
+{
+    return *(&vec->x + idx);
+}
+
+static inline void vmathSoaV4Add( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = vec_add( vec0->x, vec1->x );
+    result->y = vec_add( vec0->y, vec1->y );
+    result->z = vec_add( vec0->z, vec1->z );
+    result->w = vec_add( vec0->w, vec1->w );
+}
+
+static inline void vmathSoaV4Sub( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = vec_sub( vec0->x, vec1->x );
+    result->y = vec_sub( vec0->y, vec1->y );
+    result->z = vec_sub( vec0->z, vec1->z );
+    result->w = vec_sub( vec0->w, vec1->w );
+}
+
+static inline void vmathSoaV4ScalarMul( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar )
+{
+    result->x = vec_madd( vec->x, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->y = vec_madd( vec->y, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->z = vec_madd( vec->z, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->w = vec_madd( vec->w, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaV4ScalarDiv( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar )
+{
+    result->x = divf4( vec->x, scalar );
+    result->y = divf4( vec->y, scalar );
+    result->z = divf4( vec->z, scalar );
+    result->w = divf4( vec->w, scalar );
+}
+
+static inline void vmathSoaV4Neg( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    result->x = negatef4( vec->x );
+    result->y = negatef4( vec->y );
+    result->z = negatef4( vec->z );
+    result->w = negatef4( vec->w );
+}
+
+static inline void vmathSoaV4MulPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = vec_madd( vec0->x, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->y = vec_madd( vec0->y, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->z = vec_madd( vec0->z, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->w = vec_madd( vec0->w, vec1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaV4DivPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = divf4( vec0->x, vec1->x );
+    result->y = divf4( vec0->y, vec1->y );
+    result->z = divf4( vec0->z, vec1->z );
+    result->w = divf4( vec0->w, vec1->w );
+}
+
+static inline void vmathSoaV4RecipPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->x );
+    result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->y );
+    result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->z );
+    result->w = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->w );
+}
+
+static inline void vmathSoaV4SqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    result->x = sqrtf4( vec->x );
+    result->y = sqrtf4( vec->y );
+    result->z = sqrtf4( vec->z );
+    result->w = sqrtf4( vec->w );
+}
+
+static inline void vmathSoaV4RsqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->x ) );
+    result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->y ) );
+    result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->z ) );
+    result->w = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->w ) );
+}
+
+static inline void vmathSoaV4AbsPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    result->x = fabsf4( vec->x );
+    result->y = fabsf4( vec->y );
+    result->z = fabsf4( vec->z );
+    result->w = fabsf4( vec->w );
+}
+
+static inline void vmathSoaV4CopySignPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = copysignf4( vec0->x, vec1->x );
+    result->y = copysignf4( vec0->y, vec1->y );
+    result->z = copysignf4( vec0->z, vec1->z );
+    result->w = copysignf4( vec0->w, vec1->w );
+}
+
+static inline void vmathSoaV4MaxPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = fmaxf4( vec0->x, vec1->x );
+    result->y = fmaxf4( vec0->y, vec1->y );
+    result->z = fmaxf4( vec0->z, vec1->z );
+    result->w = fmaxf4( vec0->w, vec1->w );
+}
+
+static inline vec_float4 vmathSoaV4MaxElem( const VmathSoaVector4 *vec )
+{
+    vec_float4 result;
+    result = fmaxf4( vec->x, vec->y );
+    result = fmaxf4( vec->z, result );
+    result = fmaxf4( vec->w, result );
+    return result;
+}
+
+static inline void vmathSoaV4MinPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = fminf4( vec0->x, vec1->x );
+    result->y = fminf4( vec0->y, vec1->y );
+    result->z = fminf4( vec0->z, vec1->z );
+    result->w = fminf4( vec0->w, vec1->w );
+}
+
+static inline vec_float4 vmathSoaV4MinElem( const VmathSoaVector4 *vec )
+{
+    vec_float4 result;
+    result = fminf4( vec->x, vec->y );
+    result = fminf4( vec->z, result );
+    result = fminf4( vec->w, result );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV4Sum( const VmathSoaVector4 *vec )
+{
+    vec_float4 result;
+    result = vec_add( vec->x, vec->y );
+    result = vec_add( result, vec->z );
+    result = vec_add( result, vec->w );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV4Dot( const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    vec_float4 result;
+    result = vec_madd( vec0->x, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( vec0->y, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( vec0->z, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( vec0->w, vec1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV4LengthSqr( const VmathSoaVector4 *vec )
+{
+    vec_float4 result;
+    result = vec_madd( vec->x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( vec->y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( vec->z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( vec->w, vec->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV4Length( const VmathSoaVector4 *vec )
+{
+    return sqrtf4( vmathSoaV4LengthSqr( vec ) );
+}
+
+static inline void vmathSoaV4Normalize( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    vec_float4 lenSqr, lenInv;
+    lenSqr = vmathSoaV4LengthSqr( vec );
+    lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
+    result->x = vec_madd( vec->x, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->y = vec_madd( vec->y, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->z = vec_madd( vec->z, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->w = vec_madd( vec->w, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaV4Select( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1, vec_uint4 select1 )
+{
+    result->x = vec_sel( vec0->x, vec1->x, select1 );
+    result->y = vec_sel( vec0->y, vec1->y, select1 );
+    result->z = vec_sel( vec0->z, vec1->z, select1 );
+    result->w = vec_sel( vec0->w, vec1->w, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaV4Print( const VmathSoaVector4 *vec )
+{
+    VmathVector4 vec0, vec1, vec2, vec3;
+    vmathSoaV4Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathV4Print( &vec0 );
+    printf("slot 1:\n");
+    vmathV4Print( &vec1 );
+    printf("slot 2:\n");
+    vmathV4Print( &vec2 );
+    printf("slot 3:\n");
+    vmathV4Print( &vec3 );
+}
+
+static inline void vmathSoaV4Prints( const VmathSoaVector4 *vec, const char *name )
+{
+    VmathVector4 vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    vmathSoaV4Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathV4Print( &vec0 );
+    printf("slot 1:\n");
+    vmathV4Print( &vec1 );
+    printf("slot 2:\n");
+    vmathV4Print( &vec2 );
+    printf("slot 3:\n");
+    vmathV4Print( &vec3 );
+}
+
+#endif
+
+static inline void vmathSoaP3Copy( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = pnt->x;
+    result->y = pnt->y;
+    result->z = pnt->z;
+}
+
+static inline void vmathSoaP3MakeFromElems( VmathSoaPoint3 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z )
+{
+    result->x = _x;
+    result->y = _y;
+    result->z = _z;
+}
+
+static inline void vmathSoaP3MakeFromV3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+}
+
+static inline void vmathSoaP3MakeFromScalar( VmathSoaPoint3 *result, vec_float4 scalar )
+{
+    result->x = scalar;
+    result->y = scalar;
+    result->z = scalar;
+}
+
+static inline void vmathSoaP3MakeFromAos( VmathSoaPoint3 *result, const VmathPoint3 *pnt )
+{
+    vec_float4 vec128 = pnt->vec128;
+    result->x = vec_splat( vec128, 0 );
+    result->y = vec_splat( vec128, 1 );
+    result->z = vec_splat( vec128, 2 );
+}
+
+static inline void vmathSoaP3MakeFrom4Aos( VmathSoaPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = vec_mergeh( pnt0->vec128, pnt2->vec128 );
+    tmp1 = vec_mergeh( pnt1->vec128, pnt3->vec128 );
+    tmp2 = vec_mergel( pnt0->vec128, pnt2->vec128 );
+    tmp3 = vec_mergel( pnt1->vec128, pnt3->vec128 );
+    result->x = vec_mergeh( tmp0, tmp1 );
+    result->y = vec_mergel( tmp0, tmp1 );
+    result->z = vec_mergeh( tmp2, tmp3 );
+}
+
+static inline void vmathSoaP3Lerp( VmathSoaPoint3 *result, vec_float4 t, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    VmathSoaVector3 tmpV3_0, tmpV3_1;
+    vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
+    vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
+    vmathSoaP3AddV3( result, pnt0, &tmpV3_1 );
+}
+
+static inline void vmathSoaP3Get4Aos( const VmathSoaPoint3 *pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 )
+{
+    vec_float4 tmp0, tmp1;
+    tmp0 = vec_mergeh( pnt->x, pnt->z );
+    tmp1 = vec_mergel( pnt->x, pnt->z );
+    vmathP3MakeFrom128( result0, vec_mergeh( tmp0, pnt->y ) );
+    vmathP3MakeFrom128( result1, vec_perm( tmp0, pnt->y, _VECTORMATH_PERM_ZBWX ) );
+    vmathP3MakeFrom128( result2, vec_perm( tmp1, pnt->y, _VECTORMATH_PERM_XCYX ) );
+    vmathP3MakeFrom128( result3, vec_perm( tmp1, pnt->y, _VECTORMATH_PERM_ZDWX ) );
+}
+
+static inline void vmathSoaP3LoadXYZArray( VmathSoaPoint3 *vec, const vec_float4 *threeQuads )
+{
+    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyxy = vec_sld( yzxy, xyzx, 8 );
+    zxzx = vec_sld( xyzx, zxyz, 8 );
+    yzyz = vec_sld( zxyz, yzxy, 8 );
+    vmathSoaP3SetX( vec, vec_perm( xyxy, zxzx, _VECTORMATH_PERM_ZBXD ) );
+    vmathSoaP3SetY( vec, vec_perm( xyxy, yzyz, _VECTORMATH_PERM_WCYA ) );
+    vmathSoaP3SetZ( vec, vec_perm( zxzx, yzyz, _VECTORMATH_PERM_XDZB ) );
+}
+
+static inline void vmathSoaP3StoreXYZArray( const VmathSoaPoint3 *vec, vec_float4 *threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
+    xyxy = vec_perm( vec->x, vec->y, _VECTORMATH_PERM_ZCXA );
+    zxzx = vec_perm( vec->z, vec->x, _VECTORMATH_PERM_XBZD );
+    yzyz = vec_perm( vec->y, vec->z, _VECTORMATH_PERM_WDYB );
+    xyzx = vec_sld( xyxy, zxzx, 8 );
+    yzxy = vec_sld( yzyz, xyxy, 8 );
+    zxyz = vec_sld( zxzx, yzyz, 8 );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+static inline void vmathSoaP3StoreHalfFloats( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_ushort8 *threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    vmathSoaP3StoreXYZArray( pnt0, xyz0 );
+    vmathSoaP3StoreXYZArray( pnt1, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+static inline void vmathSoaP3SetX( VmathSoaPoint3 *result, vec_float4 _x )
+{
+    result->x = _x;
+}
+
+static inline vec_float4 vmathSoaP3GetX( const VmathSoaPoint3 *pnt )
+{
+    return pnt->x;
+}
+
+static inline void vmathSoaP3SetY( VmathSoaPoint3 *result, vec_float4 _y )
+{
+    result->y = _y;
+}
+
+static inline vec_float4 vmathSoaP3GetY( const VmathSoaPoint3 *pnt )
+{
+    return pnt->y;
+}
+
+static inline void vmathSoaP3SetZ( VmathSoaPoint3 *result, vec_float4 _z )
+{
+    result->z = _z;
+}
+
+static inline vec_float4 vmathSoaP3GetZ( const VmathSoaPoint3 *pnt )
+{
+    return pnt->z;
+}
+
+static inline void vmathSoaP3SetElem( VmathSoaPoint3 *result, int idx, vec_float4 value )
+{
+    *(&result->x + idx) = value;
+}
+
+static inline vec_float4 vmathSoaP3GetElem( const VmathSoaPoint3 *pnt, int idx )
+{
+    return *(&pnt->x + idx);
+}
+
+static inline void vmathSoaP3Sub( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = vec_sub( pnt0->x, pnt1->x );
+    result->y = vec_sub( pnt0->y, pnt1->y );
+    result->z = vec_sub( pnt0->z, pnt1->z );
+}
+
+static inline void vmathSoaP3AddV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec1 )
+{
+    result->x = vec_add( pnt->x, vec1->x );
+    result->y = vec_add( pnt->y, vec1->y );
+    result->z = vec_add( pnt->z, vec1->z );
+}
+
+static inline void vmathSoaP3SubV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec1 )
+{
+    result->x = vec_sub( pnt->x, vec1->x );
+    result->y = vec_sub( pnt->y, vec1->y );
+    result->z = vec_sub( pnt->z, vec1->z );
+}
+
+static inline void vmathSoaP3MulPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = vec_madd( pnt0->x, pnt1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->y = vec_madd( pnt0->y, pnt1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->z = vec_madd( pnt0->z, pnt1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaP3DivPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = divf4( pnt0->x, pnt1->x );
+    result->y = divf4( pnt0->y, pnt1->y );
+    result->z = divf4( pnt0->z, pnt1->z );
+}
+
+static inline void vmathSoaP3RecipPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt->x );
+    result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt->y );
+    result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt->z );
+}
+
+static inline void vmathSoaP3SqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = sqrtf4( pnt->x );
+    result->y = sqrtf4( pnt->y );
+    result->z = sqrtf4( pnt->z );
+}
+
+static inline void vmathSoaP3RsqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt->x ) );
+    result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt->y ) );
+    result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt->z ) );
+}
+
+static inline void vmathSoaP3AbsPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = fabsf4( pnt->x );
+    result->y = fabsf4( pnt->y );
+    result->z = fabsf4( pnt->z );
+}
+
+static inline void vmathSoaP3CopySignPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = copysignf4( pnt0->x, pnt1->x );
+    result->y = copysignf4( pnt0->y, pnt1->y );
+    result->z = copysignf4( pnt0->z, pnt1->z );
+}
+
+static inline void vmathSoaP3MaxPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = fmaxf4( pnt0->x, pnt1->x );
+    result->y = fmaxf4( pnt0->y, pnt1->y );
+    result->z = fmaxf4( pnt0->z, pnt1->z );
+}
+
+static inline vec_float4 vmathSoaP3MaxElem( const VmathSoaPoint3 *pnt )
+{
+    vec_float4 result;
+    result = fmaxf4( pnt->x, pnt->y );
+    result = fmaxf4( pnt->z, result );
+    return result;
+}
+
+static inline void vmathSoaP3MinPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = fminf4( pnt0->x, pnt1->x );
+    result->y = fminf4( pnt0->y, pnt1->y );
+    result->z = fminf4( pnt0->z, pnt1->z );
+}
+
+static inline vec_float4 vmathSoaP3MinElem( const VmathSoaPoint3 *pnt )
+{
+    vec_float4 result;
+    result = fminf4( pnt->x, pnt->y );
+    result = fminf4( pnt->z, result );
+    return result;
+}
+
+static inline vec_float4 vmathSoaP3Sum( const VmathSoaPoint3 *pnt )
+{
+    vec_float4 result;
+    result = vec_add( pnt->x, pnt->y );
+    result = vec_add( result, pnt->z );
+    return result;
+}
+
+static inline void vmathSoaP3Scale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, vec_float4 scaleVal )
+{
+    VmathSoaPoint3 tmpP3_0;
+    vmathSoaP3MakeFromScalar( &tmpP3_0, scaleVal );
+    vmathSoaP3MulPerElem( result, pnt, &tmpP3_0 );
+}
+
+static inline void vmathSoaP3NonUniformScale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *scaleVec )
+{
+    VmathSoaPoint3 tmpP3_0;
+    vmathSoaP3MakeFromV3( &tmpP3_0, scaleVec );
+    vmathSoaP3MulPerElem( result, pnt, &tmpP3_0 );
+}
+
+static inline vec_float4 vmathSoaP3Projection( const VmathSoaPoint3 *pnt, const VmathSoaVector3 *unitVec )
+{
+    vec_float4 result;
+    result = vec_madd( pnt->x, unitVec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( pnt->y, unitVec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( pnt->z, unitVec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaP3DistSqrFromOrigin( const VmathSoaPoint3 *pnt )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaV3MakeFromP3( &tmpV3_0, pnt );
+    return vmathSoaV3LengthSqr( &tmpV3_0 );
+}
+
+static inline vec_float4 vmathSoaP3DistFromOrigin( const VmathSoaPoint3 *pnt )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaV3MakeFromP3( &tmpV3_0, pnt );
+    return vmathSoaV3Length( &tmpV3_0 );
+}
+
+static inline vec_float4 vmathSoaP3DistSqr( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
+    return vmathSoaV3LengthSqr( &tmpV3_0 );
+}
+
+static inline vec_float4 vmathSoaP3Dist( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
+    return vmathSoaV3Length( &tmpV3_0 );
+}
+
+static inline void vmathSoaP3Select( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_uint4 select1 )
+{
+    result->x = vec_sel( pnt0->x, pnt1->x, select1 );
+    result->y = vec_sel( pnt0->y, pnt1->y, select1 );
+    result->z = vec_sel( pnt0->z, pnt1->z, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaP3Print( const VmathSoaPoint3 *pnt )
+{
+    VmathPoint3 vec0, vec1, vec2, vec3;
+    vmathSoaP3Get4Aos( pnt, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathP3Print( &vec0 );
+    printf("slot 1:\n");
+    vmathP3Print( &vec1 );
+    printf("slot 2:\n");
+    vmathP3Print( &vec2 );
+    printf("slot 3:\n");
+    vmathP3Print( &vec3 );
+}
+
+static inline void vmathSoaP3Prints( const VmathSoaPoint3 *pnt, const char *name )
+{
+    VmathPoint3 vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    vmathSoaP3Get4Aos( pnt, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathP3Print( &vec0 );
+    printf("slot 1:\n");
+    vmathP3Print( &vec1 );
+    printf("slot 2:\n");
+    vmathP3Print( &vec2 );
+    printf("slot 3:\n");
+    vmathP3Print( &vec3 );
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_soa_v.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_soa_v.h
index 57805b654..75d7bb6bc 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_soa_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_soa_v.h
@@ -1,958 +1,958 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_SOA_V_C_H
-#define _VECTORMATH_VEC_SOA_V_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- * for permutes, words are labeled [x,y,z,w] [a,b,c,d]
- */
-#define _VECTORMATH_PERM_X 0x00010203
-#define _VECTORMATH_PERM_Y 0x04050607
-#define _VECTORMATH_PERM_Z 0x08090a0b
-#define _VECTORMATH_PERM_W 0x0c0d0e0f
-#define _VECTORMATH_PERM_A 0x10111213
-#define _VECTORMATH_PERM_B 0x14151617
-#define _VECTORMATH_PERM_C 0x18191a1b
-#define _VECTORMATH_PERM_D 0x1c1d1e1f
-#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_ZDWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_ZCXA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_A })
-#define _VECTORMATH_PERM_XBZD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_WDYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B })
-#define _VECTORMATH_PERM_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_WCYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
-#define _VECTORMATH_PERM_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B })
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline VmathSoaVector3 vmathSoaV3MakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeFromElems(&result, _x, _y, _z);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeFromP3_V( VmathSoaPoint3 pnt )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeFromP3(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeFromAos_V( VmathVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeFromAos(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeFrom4Aos_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeFrom4Aos(&result, &vec0, &vec1, &vec2, &vec3);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeXAxis_V( )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeXAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeYAxis_V( )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeYAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeZAxis_V( )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeZAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3Lerp_V( vec_float4 t, VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Lerp(&result, t, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3Slerp_V( vec_float4 t, VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Slerp(&result, t, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline void vmathSoaV3Get4Aos_V( VmathSoaVector3 vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 )
-{
-    vmathSoaV3Get4Aos(&vec, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaV3LoadXYZArray_V( VmathSoaVector3 *vec, const vec_float4 *threeQuads )
-{
-    vmathSoaV3LoadXYZArray(vec, threeQuads);
-}
-
-static inline void vmathSoaV3StoreXYZArray_V( VmathSoaVector3 vec, vec_float4 *threeQuads )
-{
-    vmathSoaV3StoreXYZArray(&vec, threeQuads);
-}
-
-static inline void vmathSoaV3StoreHalfFloats_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_ushort8 *threeQuads )
-{
-    vmathSoaV3StoreHalfFloats(&vec0, &vec1, threeQuads);
-}
-
-static inline void vmathSoaV3SetX_V( VmathSoaVector3 *result, vec_float4 _x )
-{
-    vmathSoaV3SetX(result, _x);
-}
-
-static inline vec_float4 vmathSoaV3GetX_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3GetX(&vec);
-}
-
-static inline void vmathSoaV3SetY_V( VmathSoaVector3 *result, vec_float4 _y )
-{
-    vmathSoaV3SetY(result, _y);
-}
-
-static inline vec_float4 vmathSoaV3GetY_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3GetY(&vec);
-}
-
-static inline void vmathSoaV3SetZ_V( VmathSoaVector3 *result, vec_float4 _z )
-{
-    vmathSoaV3SetZ(result, _z);
-}
-
-static inline vec_float4 vmathSoaV3GetZ_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3GetZ(&vec);
-}
-
-static inline void vmathSoaV3SetElem_V( VmathSoaVector3 *result, int idx, vec_float4 value )
-{
-    vmathSoaV3SetElem(result, idx, value);
-}
-
-static inline vec_float4 vmathSoaV3GetElem_V( VmathSoaVector3 vec, int idx )
-{
-    return vmathSoaV3GetElem(&vec, idx);
-}
-
-static inline VmathSoaVector3 vmathSoaV3Add_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Add(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3Sub_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Sub(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaV3AddP3_V( VmathSoaVector3 vec, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaV3AddP3(&result, &vec, &pnt1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3ScalarMul_V( VmathSoaVector3 vec, vec_float4 scalar )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3ScalarMul(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3ScalarDiv_V( VmathSoaVector3 vec, vec_float4 scalar )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3ScalarDiv(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3Neg_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Neg(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MulPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MulPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3DivPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3DivPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3RecipPerElem_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3RecipPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3SqrtPerElem_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3SqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3RsqrtPerElem_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3RsqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3AbsPerElem_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3AbsPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3CopySignPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3CopySignPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MaxPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MaxPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline vec_float4 vmathSoaV3MaxElem_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3MaxElem(&vec);
-}
-
-static inline VmathSoaVector3 vmathSoaV3MinPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MinPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline vec_float4 vmathSoaV3MinElem_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3MinElem(&vec);
-}
-
-static inline vec_float4 vmathSoaV3Sum_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3Sum(&vec);
-}
-
-static inline vec_float4 vmathSoaV3Dot_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    return vmathSoaV3Dot(&vec0, &vec1);
-}
-
-static inline vec_float4 vmathSoaV3LengthSqr_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3LengthSqr(&vec);
-}
-
-static inline vec_float4 vmathSoaV3Length_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3Length(&vec);
-}
-
-static inline VmathSoaVector3 vmathSoaV3Normalize_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Normalize(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3Cross_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Cross(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3Select_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_uint4 select1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Select(&result, &vec0, &vec1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaV3Print_V( VmathSoaVector3 vec )
-{
-    vmathSoaV3Print(&vec);
-}
-
-static inline void vmathSoaV3Prints_V( VmathSoaVector3 vec, const char *name )
-{
-    vmathSoaV3Prints(&vec, name);
-}
-
-#endif
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromElems(&result, _x, _y, _z, _w);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 _w )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromV3Scalar(&result, &xyz, _w);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromV3_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromV3(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromP3_V( VmathSoaPoint3 pnt )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromP3(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromQ_V( VmathSoaQuat quat )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromQ(&result, &quat);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromAos_V( VmathVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromAos(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFrom4Aos_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFrom4Aos(&result, &vec0, &vec1, &vec2, &vec3);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeXAxis_V( )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeXAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeYAxis_V( )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeYAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeZAxis_V( )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeZAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeWAxis_V( )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeWAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4Lerp_V( vec_float4 t, VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Lerp(&result, t, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4Slerp_V( vec_float4 t, VmathSoaVector4 unitVec0, VmathSoaVector4 unitVec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Slerp(&result, t, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline void vmathSoaV4Get4Aos_V( VmathSoaVector4 vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 )
-{
-    vmathSoaV4Get4Aos(&vec, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaV4StoreHalfFloats_V( VmathSoaVector4 vec, vec_ushort8 *twoQuads )
-{
-    vmathSoaV4StoreHalfFloats(&vec, twoQuads);
-}
-
-static inline void vmathSoaV4SetXYZ_V( VmathSoaVector4 *result, VmathSoaVector3 vec )
-{
-    vmathSoaV4SetXYZ(result, &vec);
-}
-
-static inline VmathSoaVector3 vmathSoaV4GetXYZ_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV4GetXYZ(&result, &vec);
-    return result;
-}
-
-static inline void vmathSoaV4SetX_V( VmathSoaVector4 *result, vec_float4 _x )
-{
-    vmathSoaV4SetX(result, _x);
-}
-
-static inline vec_float4 vmathSoaV4GetX_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4GetX(&vec);
-}
-
-static inline void vmathSoaV4SetY_V( VmathSoaVector4 *result, vec_float4 _y )
-{
-    vmathSoaV4SetY(result, _y);
-}
-
-static inline vec_float4 vmathSoaV4GetY_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4GetY(&vec);
-}
-
-static inline void vmathSoaV4SetZ_V( VmathSoaVector4 *result, vec_float4 _z )
-{
-    vmathSoaV4SetZ(result, _z);
-}
-
-static inline vec_float4 vmathSoaV4GetZ_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4GetZ(&vec);
-}
-
-static inline void vmathSoaV4SetW_V( VmathSoaVector4 *result, vec_float4 _w )
-{
-    vmathSoaV4SetW(result, _w);
-}
-
-static inline vec_float4 vmathSoaV4GetW_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4GetW(&vec);
-}
-
-static inline void vmathSoaV4SetElem_V( VmathSoaVector4 *result, int idx, vec_float4 value )
-{
-    vmathSoaV4SetElem(result, idx, value);
-}
-
-static inline vec_float4 vmathSoaV4GetElem_V( VmathSoaVector4 vec, int idx )
-{
-    return vmathSoaV4GetElem(&vec, idx);
-}
-
-static inline VmathSoaVector4 vmathSoaV4Add_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Add(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4Sub_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Sub(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4ScalarMul_V( VmathSoaVector4 vec, vec_float4 scalar )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4ScalarMul(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4ScalarDiv_V( VmathSoaVector4 vec, vec_float4 scalar )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4ScalarDiv(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4Neg_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Neg(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MulPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MulPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4DivPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4DivPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4RecipPerElem_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4RecipPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4SqrtPerElem_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4SqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4RsqrtPerElem_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4RsqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4AbsPerElem_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4AbsPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4CopySignPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4CopySignPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MaxPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MaxPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline vec_float4 vmathSoaV4MaxElem_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4MaxElem(&vec);
-}
-
-static inline VmathSoaVector4 vmathSoaV4MinPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MinPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline vec_float4 vmathSoaV4MinElem_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4MinElem(&vec);
-}
-
-static inline vec_float4 vmathSoaV4Sum_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4Sum(&vec);
-}
-
-static inline vec_float4 vmathSoaV4Dot_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    return vmathSoaV4Dot(&vec0, &vec1);
-}
-
-static inline vec_float4 vmathSoaV4LengthSqr_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4LengthSqr(&vec);
-}
-
-static inline vec_float4 vmathSoaV4Length_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4Length(&vec);
-}
-
-static inline VmathSoaVector4 vmathSoaV4Normalize_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Normalize(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4Select_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1, vec_uint4 select1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Select(&result, &vec0, &vec1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaV4Print_V( VmathSoaVector4 vec )
-{
-    vmathSoaV4Print(&vec);
-}
-
-static inline void vmathSoaV4Prints_V( VmathSoaVector4 vec, const char *name )
-{
-    vmathSoaV4Prints(&vec, name);
-}
-
-#endif
-
-static inline VmathSoaPoint3 vmathSoaP3MakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MakeFromElems(&result, _x, _y, _z);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MakeFromV3_V( VmathSoaVector3 vec )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MakeFromV3(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MakeFromAos_V( VmathPoint3 pnt )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MakeFromAos(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MakeFrom4Aos_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MakeFrom4Aos(&result, &pnt0, &pnt1, &pnt2, &pnt3);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3Lerp_V( vec_float4 t, VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3Lerp(&result, t, &pnt0, &pnt1);
-    return result;
-}
-
-static inline void vmathSoaP3Get4Aos_V( VmathSoaPoint3 pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 )
-{
-    vmathSoaP3Get4Aos(&pnt, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaP3LoadXYZArray_V( VmathSoaPoint3 *vec, const vec_float4 *threeQuads )
-{
-    vmathSoaP3LoadXYZArray(vec, threeQuads);
-}
-
-static inline void vmathSoaP3StoreXYZArray_V( VmathSoaPoint3 vec, vec_float4 *threeQuads )
-{
-    vmathSoaP3StoreXYZArray(&vec, threeQuads);
-}
-
-static inline void vmathSoaP3StoreHalfFloats_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_ushort8 *threeQuads )
-{
-    vmathSoaP3StoreHalfFloats(&pnt0, &pnt1, threeQuads);
-}
-
-static inline void vmathSoaP3SetX_V( VmathSoaPoint3 *result, vec_float4 _x )
-{
-    vmathSoaP3SetX(result, _x);
-}
-
-static inline vec_float4 vmathSoaP3GetX_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3GetX(&pnt);
-}
-
-static inline void vmathSoaP3SetY_V( VmathSoaPoint3 *result, vec_float4 _y )
-{
-    vmathSoaP3SetY(result, _y);
-}
-
-static inline vec_float4 vmathSoaP3GetY_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3GetY(&pnt);
-}
-
-static inline void vmathSoaP3SetZ_V( VmathSoaPoint3 *result, vec_float4 _z )
-{
-    vmathSoaP3SetZ(result, _z);
-}
-
-static inline vec_float4 vmathSoaP3GetZ_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3GetZ(&pnt);
-}
-
-static inline void vmathSoaP3SetElem_V( VmathSoaPoint3 *result, int idx, vec_float4 value )
-{
-    vmathSoaP3SetElem(result, idx, value);
-}
-
-static inline vec_float4 vmathSoaP3GetElem_V( VmathSoaPoint3 pnt, int idx )
-{
-    return vmathSoaP3GetElem(&pnt, idx);
-}
-
-static inline VmathSoaVector3 vmathSoaP3Sub_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaP3Sub(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3AddV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3AddV3(&result, &pnt, &vec1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3SubV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3SubV3(&result, &pnt, &vec1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MulPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MulPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3DivPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3DivPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3RecipPerElem_V( VmathSoaPoint3 pnt )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3RecipPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3SqrtPerElem_V( VmathSoaPoint3 pnt )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3SqrtPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3RsqrtPerElem_V( VmathSoaPoint3 pnt )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3RsqrtPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3AbsPerElem_V( VmathSoaPoint3 pnt )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3AbsPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3CopySignPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3CopySignPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MaxPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MaxPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline vec_float4 vmathSoaP3MaxElem_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3MaxElem(&pnt);
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MinPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MinPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline vec_float4 vmathSoaP3MinElem_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3MinElem(&pnt);
-}
-
-static inline vec_float4 vmathSoaP3Sum_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3Sum(&pnt);
-}
-
-static inline VmathSoaPoint3 vmathSoaP3Scale_V( VmathSoaPoint3 pnt, vec_float4 scaleVal )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3Scale(&result, &pnt, scaleVal);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3NonUniformScale_V( VmathSoaPoint3 pnt, VmathSoaVector3 scaleVec )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3NonUniformScale(&result, &pnt, &scaleVec);
-    return result;
-}
-
-static inline vec_float4 vmathSoaP3Projection_V( VmathSoaPoint3 pnt, VmathSoaVector3 unitVec )
-{
-    return vmathSoaP3Projection(&pnt, &unitVec);
-}
-
-static inline vec_float4 vmathSoaP3DistSqrFromOrigin_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3DistSqrFromOrigin(&pnt);
-}
-
-static inline vec_float4 vmathSoaP3DistFromOrigin_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3DistFromOrigin(&pnt);
-}
-
-static inline vec_float4 vmathSoaP3DistSqr_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    return vmathSoaP3DistSqr(&pnt0, &pnt1);
-}
-
-static inline vec_float4 vmathSoaP3Dist_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    return vmathSoaP3Dist(&pnt0, &pnt1);
-}
-
-static inline VmathSoaPoint3 vmathSoaP3Select_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_uint4 select1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3Select(&result, &pnt0, &pnt1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaP3Print_V( VmathSoaPoint3 pnt )
-{
-    vmathSoaP3Print(&pnt);
-}
-
-static inline void vmathSoaP3Prints_V( VmathSoaPoint3 pnt, const char *name )
-{
-    vmathSoaP3Prints(&pnt, name);
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_SOA_V_C_H
+#define _VECTORMATH_VEC_SOA_V_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ * for permutes, words are labeled [x,y,z,w] [a,b,c,d]
+ */
+#define _VECTORMATH_PERM_X 0x00010203
+#define _VECTORMATH_PERM_Y 0x04050607
+#define _VECTORMATH_PERM_Z 0x08090a0b
+#define _VECTORMATH_PERM_W 0x0c0d0e0f
+#define _VECTORMATH_PERM_A 0x10111213
+#define _VECTORMATH_PERM_B 0x14151617
+#define _VECTORMATH_PERM_C 0x18191a1b
+#define _VECTORMATH_PERM_D 0x1c1d1e1f
+#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_ZDWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_ZCXA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_A })
+#define _VECTORMATH_PERM_XBZD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_WDYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B })
+#define _VECTORMATH_PERM_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_WCYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
+#define _VECTORMATH_PERM_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B })
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline VmathSoaVector3 vmathSoaV3MakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeFromElems(&result, _x, _y, _z);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeFromP3_V( VmathSoaPoint3 pnt )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeFromP3(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeFromAos_V( VmathVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeFromAos(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeFrom4Aos_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeFrom4Aos(&result, &vec0, &vec1, &vec2, &vec3);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeXAxis_V( )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeXAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeYAxis_V( )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeYAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeZAxis_V( )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeZAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3Lerp_V( vec_float4 t, VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Lerp(&result, t, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3Slerp_V( vec_float4 t, VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Slerp(&result, t, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline void vmathSoaV3Get4Aos_V( VmathSoaVector3 vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 )
+{
+    vmathSoaV3Get4Aos(&vec, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaV3LoadXYZArray_V( VmathSoaVector3 *vec, const vec_float4 *threeQuads )
+{
+    vmathSoaV3LoadXYZArray(vec, threeQuads);
+}
+
+static inline void vmathSoaV3StoreXYZArray_V( VmathSoaVector3 vec, vec_float4 *threeQuads )
+{
+    vmathSoaV3StoreXYZArray(&vec, threeQuads);
+}
+
+static inline void vmathSoaV3StoreHalfFloats_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_ushort8 *threeQuads )
+{
+    vmathSoaV3StoreHalfFloats(&vec0, &vec1, threeQuads);
+}
+
+static inline void vmathSoaV3SetX_V( VmathSoaVector3 *result, vec_float4 _x )
+{
+    vmathSoaV3SetX(result, _x);
+}
+
+static inline vec_float4 vmathSoaV3GetX_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3GetX(&vec);
+}
+
+static inline void vmathSoaV3SetY_V( VmathSoaVector3 *result, vec_float4 _y )
+{
+    vmathSoaV3SetY(result, _y);
+}
+
+static inline vec_float4 vmathSoaV3GetY_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3GetY(&vec);
+}
+
+static inline void vmathSoaV3SetZ_V( VmathSoaVector3 *result, vec_float4 _z )
+{
+    vmathSoaV3SetZ(result, _z);
+}
+
+static inline vec_float4 vmathSoaV3GetZ_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3GetZ(&vec);
+}
+
+static inline void vmathSoaV3SetElem_V( VmathSoaVector3 *result, int idx, vec_float4 value )
+{
+    vmathSoaV3SetElem(result, idx, value);
+}
+
+static inline vec_float4 vmathSoaV3GetElem_V( VmathSoaVector3 vec, int idx )
+{
+    return vmathSoaV3GetElem(&vec, idx);
+}
+
+static inline VmathSoaVector3 vmathSoaV3Add_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Add(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3Sub_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Sub(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaV3AddP3_V( VmathSoaVector3 vec, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaV3AddP3(&result, &vec, &pnt1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3ScalarMul_V( VmathSoaVector3 vec, vec_float4 scalar )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3ScalarMul(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3ScalarDiv_V( VmathSoaVector3 vec, vec_float4 scalar )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3ScalarDiv(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3Neg_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Neg(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MulPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MulPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3DivPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3DivPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3RecipPerElem_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3RecipPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3SqrtPerElem_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3SqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3RsqrtPerElem_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3RsqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3AbsPerElem_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3AbsPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3CopySignPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3CopySignPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MaxPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MaxPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline vec_float4 vmathSoaV3MaxElem_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3MaxElem(&vec);
+}
+
+static inline VmathSoaVector3 vmathSoaV3MinPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MinPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline vec_float4 vmathSoaV3MinElem_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3MinElem(&vec);
+}
+
+static inline vec_float4 vmathSoaV3Sum_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3Sum(&vec);
+}
+
+static inline vec_float4 vmathSoaV3Dot_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    return vmathSoaV3Dot(&vec0, &vec1);
+}
+
+static inline vec_float4 vmathSoaV3LengthSqr_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3LengthSqr(&vec);
+}
+
+static inline vec_float4 vmathSoaV3Length_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3Length(&vec);
+}
+
+static inline VmathSoaVector3 vmathSoaV3Normalize_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Normalize(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3Cross_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Cross(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3Select_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_uint4 select1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Select(&result, &vec0, &vec1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaV3Print_V( VmathSoaVector3 vec )
+{
+    vmathSoaV3Print(&vec);
+}
+
+static inline void vmathSoaV3Prints_V( VmathSoaVector3 vec, const char *name )
+{
+    vmathSoaV3Prints(&vec, name);
+}
+
+#endif
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromElems(&result, _x, _y, _z, _w);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 _w )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromV3Scalar(&result, &xyz, _w);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromV3_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromV3(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromP3_V( VmathSoaPoint3 pnt )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromP3(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromQ_V( VmathSoaQuat quat )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromQ(&result, &quat);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromAos_V( VmathVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromAos(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFrom4Aos_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFrom4Aos(&result, &vec0, &vec1, &vec2, &vec3);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeXAxis_V( )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeXAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeYAxis_V( )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeYAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeZAxis_V( )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeZAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeWAxis_V( )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeWAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4Lerp_V( vec_float4 t, VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Lerp(&result, t, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4Slerp_V( vec_float4 t, VmathSoaVector4 unitVec0, VmathSoaVector4 unitVec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Slerp(&result, t, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline void vmathSoaV4Get4Aos_V( VmathSoaVector4 vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 )
+{
+    vmathSoaV4Get4Aos(&vec, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaV4StoreHalfFloats_V( VmathSoaVector4 vec, vec_ushort8 *twoQuads )
+{
+    vmathSoaV4StoreHalfFloats(&vec, twoQuads);
+}
+
+static inline void vmathSoaV4SetXYZ_V( VmathSoaVector4 *result, VmathSoaVector3 vec )
+{
+    vmathSoaV4SetXYZ(result, &vec);
+}
+
+static inline VmathSoaVector3 vmathSoaV4GetXYZ_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV4GetXYZ(&result, &vec);
+    return result;
+}
+
+static inline void vmathSoaV4SetX_V( VmathSoaVector4 *result, vec_float4 _x )
+{
+    vmathSoaV4SetX(result, _x);
+}
+
+static inline vec_float4 vmathSoaV4GetX_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4GetX(&vec);
+}
+
+static inline void vmathSoaV4SetY_V( VmathSoaVector4 *result, vec_float4 _y )
+{
+    vmathSoaV4SetY(result, _y);
+}
+
+static inline vec_float4 vmathSoaV4GetY_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4GetY(&vec);
+}
+
+static inline void vmathSoaV4SetZ_V( VmathSoaVector4 *result, vec_float4 _z )
+{
+    vmathSoaV4SetZ(result, _z);
+}
+
+static inline vec_float4 vmathSoaV4GetZ_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4GetZ(&vec);
+}
+
+static inline void vmathSoaV4SetW_V( VmathSoaVector4 *result, vec_float4 _w )
+{
+    vmathSoaV4SetW(result, _w);
+}
+
+static inline vec_float4 vmathSoaV4GetW_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4GetW(&vec);
+}
+
+static inline void vmathSoaV4SetElem_V( VmathSoaVector4 *result, int idx, vec_float4 value )
+{
+    vmathSoaV4SetElem(result, idx, value);
+}
+
+static inline vec_float4 vmathSoaV4GetElem_V( VmathSoaVector4 vec, int idx )
+{
+    return vmathSoaV4GetElem(&vec, idx);
+}
+
+static inline VmathSoaVector4 vmathSoaV4Add_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Add(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4Sub_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Sub(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4ScalarMul_V( VmathSoaVector4 vec, vec_float4 scalar )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4ScalarMul(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4ScalarDiv_V( VmathSoaVector4 vec, vec_float4 scalar )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4ScalarDiv(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4Neg_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Neg(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MulPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MulPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4DivPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4DivPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4RecipPerElem_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4RecipPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4SqrtPerElem_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4SqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4RsqrtPerElem_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4RsqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4AbsPerElem_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4AbsPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4CopySignPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4CopySignPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MaxPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MaxPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline vec_float4 vmathSoaV4MaxElem_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4MaxElem(&vec);
+}
+
+static inline VmathSoaVector4 vmathSoaV4MinPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MinPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline vec_float4 vmathSoaV4MinElem_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4MinElem(&vec);
+}
+
+static inline vec_float4 vmathSoaV4Sum_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4Sum(&vec);
+}
+
+static inline vec_float4 vmathSoaV4Dot_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    return vmathSoaV4Dot(&vec0, &vec1);
+}
+
+static inline vec_float4 vmathSoaV4LengthSqr_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4LengthSqr(&vec);
+}
+
+static inline vec_float4 vmathSoaV4Length_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4Length(&vec);
+}
+
+static inline VmathSoaVector4 vmathSoaV4Normalize_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Normalize(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4Select_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1, vec_uint4 select1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Select(&result, &vec0, &vec1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaV4Print_V( VmathSoaVector4 vec )
+{
+    vmathSoaV4Print(&vec);
+}
+
+static inline void vmathSoaV4Prints_V( VmathSoaVector4 vec, const char *name )
+{
+    vmathSoaV4Prints(&vec, name);
+}
+
+#endif
+
+static inline VmathSoaPoint3 vmathSoaP3MakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MakeFromElems(&result, _x, _y, _z);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MakeFromV3_V( VmathSoaVector3 vec )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MakeFromV3(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MakeFromAos_V( VmathPoint3 pnt )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MakeFromAos(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MakeFrom4Aos_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MakeFrom4Aos(&result, &pnt0, &pnt1, &pnt2, &pnt3);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3Lerp_V( vec_float4 t, VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3Lerp(&result, t, &pnt0, &pnt1);
+    return result;
+}
+
+static inline void vmathSoaP3Get4Aos_V( VmathSoaPoint3 pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 )
+{
+    vmathSoaP3Get4Aos(&pnt, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaP3LoadXYZArray_V( VmathSoaPoint3 *vec, const vec_float4 *threeQuads )
+{
+    vmathSoaP3LoadXYZArray(vec, threeQuads);
+}
+
+static inline void vmathSoaP3StoreXYZArray_V( VmathSoaPoint3 vec, vec_float4 *threeQuads )
+{
+    vmathSoaP3StoreXYZArray(&vec, threeQuads);
+}
+
+static inline void vmathSoaP3StoreHalfFloats_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_ushort8 *threeQuads )
+{
+    vmathSoaP3StoreHalfFloats(&pnt0, &pnt1, threeQuads);
+}
+
+static inline void vmathSoaP3SetX_V( VmathSoaPoint3 *result, vec_float4 _x )
+{
+    vmathSoaP3SetX(result, _x);
+}
+
+static inline vec_float4 vmathSoaP3GetX_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3GetX(&pnt);
+}
+
+static inline void vmathSoaP3SetY_V( VmathSoaPoint3 *result, vec_float4 _y )
+{
+    vmathSoaP3SetY(result, _y);
+}
+
+static inline vec_float4 vmathSoaP3GetY_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3GetY(&pnt);
+}
+
+static inline void vmathSoaP3SetZ_V( VmathSoaPoint3 *result, vec_float4 _z )
+{
+    vmathSoaP3SetZ(result, _z);
+}
+
+static inline vec_float4 vmathSoaP3GetZ_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3GetZ(&pnt);
+}
+
+static inline void vmathSoaP3SetElem_V( VmathSoaPoint3 *result, int idx, vec_float4 value )
+{
+    vmathSoaP3SetElem(result, idx, value);
+}
+
+static inline vec_float4 vmathSoaP3GetElem_V( VmathSoaPoint3 pnt, int idx )
+{
+    return vmathSoaP3GetElem(&pnt, idx);
+}
+
+static inline VmathSoaVector3 vmathSoaP3Sub_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaP3Sub(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3AddV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3AddV3(&result, &pnt, &vec1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3SubV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3SubV3(&result, &pnt, &vec1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MulPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MulPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3DivPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3DivPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3RecipPerElem_V( VmathSoaPoint3 pnt )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3RecipPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3SqrtPerElem_V( VmathSoaPoint3 pnt )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3SqrtPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3RsqrtPerElem_V( VmathSoaPoint3 pnt )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3RsqrtPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3AbsPerElem_V( VmathSoaPoint3 pnt )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3AbsPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3CopySignPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3CopySignPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MaxPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MaxPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline vec_float4 vmathSoaP3MaxElem_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3MaxElem(&pnt);
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MinPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MinPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline vec_float4 vmathSoaP3MinElem_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3MinElem(&pnt);
+}
+
+static inline vec_float4 vmathSoaP3Sum_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3Sum(&pnt);
+}
+
+static inline VmathSoaPoint3 vmathSoaP3Scale_V( VmathSoaPoint3 pnt, vec_float4 scaleVal )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3Scale(&result, &pnt, scaleVal);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3NonUniformScale_V( VmathSoaPoint3 pnt, VmathSoaVector3 scaleVec )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3NonUniformScale(&result, &pnt, &scaleVec);
+    return result;
+}
+
+static inline vec_float4 vmathSoaP3Projection_V( VmathSoaPoint3 pnt, VmathSoaVector3 unitVec )
+{
+    return vmathSoaP3Projection(&pnt, &unitVec);
+}
+
+static inline vec_float4 vmathSoaP3DistSqrFromOrigin_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3DistSqrFromOrigin(&pnt);
+}
+
+static inline vec_float4 vmathSoaP3DistFromOrigin_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3DistFromOrigin(&pnt);
+}
+
+static inline vec_float4 vmathSoaP3DistSqr_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    return vmathSoaP3DistSqr(&pnt0, &pnt1);
+}
+
+static inline vec_float4 vmathSoaP3Dist_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    return vmathSoaP3Dist(&pnt0, &pnt1);
+}
+
+static inline VmathSoaPoint3 vmathSoaP3Select_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_uint4 select1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3Select(&result, &pnt0, &pnt1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaP3Print_V( VmathSoaPoint3 pnt )
+{
+    vmathSoaP3Print(&pnt);
+}
+
+static inline void vmathSoaP3Prints_V( VmathSoaPoint3 pnt, const char *name )
+{
+    vmathSoaP3Prints(&pnt, name);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_aos.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_aos.h
index 119e2d292..6a6ccd285 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_aos.h
@@ -1,1960 +1,1960 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_AOS_C_PPU_H
-#define _VECTORMATH_AOS_C_PPU_H
-
-#include <math.h>
-#include <altivec.h>
-#include <simdmath.h>
-#include "vec_types.h"
-
-#ifdef _VECTORMATH_DEBUG
-#include <stdio.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#ifndef _VECTORMATH_AOS_C_TYPES_H
-#define _VECTORMATH_AOS_C_TYPES_H
-
-/* A 3-D vector in array-of-structures format
- */
-typedef struct _VmathVector3
-{
-    vec_float4 vec128;
-} VmathVector3;
-
-/* A 4-D vector in array-of-structures format
- */
-typedef struct _VmathVector4
-{
-    vec_float4 vec128;
-} VmathVector4;
-
-/* A 3-D point in array-of-structures format
- */
-typedef struct _VmathPoint3
-{
-    vec_float4 vec128;
-} VmathPoint3;
-
-/* A quaternion in array-of-structures format
- */
-typedef struct _VmathQuat
-{
-    vec_float4 vec128;
-} VmathQuat;
-
-/* A 3x3 matrix in array-of-structures format
- */
-typedef struct _VmathMatrix3
-{
-    VmathVector3 col0;
-    VmathVector3 col1;
-    VmathVector3 col2;
-} VmathMatrix3;
-
-/* A 4x4 matrix in array-of-structures format
- */
-typedef struct _VmathMatrix4
-{
-    VmathVector4 col0;
-    VmathVector4 col1;
-    VmathVector4 col2;
-    VmathVector4 col3;
-} VmathMatrix4;
-
-/* A 3x4 transformation matrix in array-of-structures format
- */
-typedef struct _VmathTransform3
-{
-    VmathVector3 col0;
-    VmathVector3 col1;
-    VmathVector3 col2;
-    VmathVector3 col3;
-} VmathTransform3;
-
-#endif
-
-/*
- * Copy a 3-D vector
- */
-static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Construct a 3-D vector from x, y, and z elements
- */
-static inline void vmathV3MakeFromElems( VmathVector3 *result, float x, float y, float z );
-
-/*
- * Copy elements from a 3-D point into a 3-D vector
- */
-static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt );
-
-/*
- * Set all elements of a 3-D vector to the same scalar value
- */
-static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar );
-
-/*
- * Set vector float data in a 3-D vector
- */
-static inline void vmathV3MakeFrom128( VmathVector3 *result, vec_float4 vf4 );
-
-/*
- * Get vector float data from a 3-D vector
- */
-static inline vec_float4 vmathV3Get128( const VmathVector3 *vec );
-
-/*
- * Set the x element of a 3-D vector
- */
-static inline void vmathV3SetX( VmathVector3 *result, float x );
-
-/*
- * Set the y element of a 3-D vector
- */
-static inline void vmathV3SetY( VmathVector3 *result, float y );
-
-/*
- * Set the z element of a 3-D vector
- */
-static inline void vmathV3SetZ( VmathVector3 *result, float z );
-
-/*
- * Get the x element of a 3-D vector
- */
-static inline float vmathV3GetX( const VmathVector3 *vec );
-
-/*
- * Get the y element of a 3-D vector
- */
-static inline float vmathV3GetY( const VmathVector3 *vec );
-
-/*
- * Get the z element of a 3-D vector
- */
-static inline float vmathV3GetZ( const VmathVector3 *vec );
-
-/*
- * Set an x, y, or z element of a 3-D vector by index
- */
-static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value );
-
-/*
- * Get an x, y, or z element of a 3-D vector by index
- */
-static inline float vmathV3GetElem( const VmathVector3 *vec, int idx );
-
-/*
- * Add two 3-D vectors
- */
-static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Subtract a 3-D vector from another 3-D vector
- */
-static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Add a 3-D vector to a 3-D point
- */
-static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt );
-
-/*
- * Multiply a 3-D vector by a scalar
- */
-static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar );
-
-/*
- * Divide a 3-D vector by a scalar
- */
-static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar );
-
-/*
- * Negate all elements of a 3-D vector
- */
-static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Construct x axis
- */
-static inline void vmathV3MakeXAxis( VmathVector3 *result );
-
-/*
- * Construct y axis
- */
-static inline void vmathV3MakeYAxis( VmathVector3 *result );
-
-/*
- * Construct z axis
- */
-static inline void vmathV3MakeZAxis( VmathVector3 *result );
-
-/*
- * Multiply two 3-D vectors per element
- */
-static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Divide two 3-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Compute the reciprocal of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Compute the square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Compute the reciprocal square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Compute the absolute value of a 3-D vector per element
- */
-static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Copy sign from one 3-D vector to another, per element
- */
-static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Maximum of two 3-D vectors per element
- */
-static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Minimum of two 3-D vectors per element
- */
-static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Maximum element of a 3-D vector
- */
-static inline float vmathV3MaxElem( const VmathVector3 *vec );
-
-/*
- * Minimum element of a 3-D vector
- */
-static inline float vmathV3MinElem( const VmathVector3 *vec );
-
-/*
- * Compute the sum of all elements of a 3-D vector
- */
-static inline float vmathV3Sum( const VmathVector3 *vec );
-
-/*
- * Compute the dot product of two 3-D vectors
- */
-static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Compute the square of the length of a 3-D vector
- */
-static inline float vmathV3LengthSqr( const VmathVector3 *vec );
-
-/*
- * Compute the length of a 3-D vector
- */
-static inline float vmathV3Length( const VmathVector3 *vec );
-
-/*
- * Normalize a 3-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Compute cross product of two 3-D vectors
- */
-static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Outer product of two 3-D vectors
- */
-static inline void vmathV3Outer( VmathMatrix3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Pre-multiply a row vector by a 3x3 matrix
- * NOTE: 
- * Slower than column post-multiply.
- */
-static inline void vmathV3RowMul( VmathVector3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat );
-
-/*
- * Cross-product matrix of a 3-D vector
- */
-static inline void vmathV3CrossMatrix( VmathMatrix3 *result, const VmathVector3 *vec );
-
-/*
- * Create cross-product matrix and multiply
- * NOTE: 
- * Faster than separately creating a cross-product matrix and multiplying.
- */
-static inline void vmathV3CrossMatrixMul( VmathMatrix3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat );
-
-/*
- * Linear interpolation between two 3-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Spherical linear interpolation between two 3-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 );
-
-/*
- * Conditionally select between two 3-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 );
-
-/*
- * Store x, y, and z elements of a 3-D vector in the first three words of a quadword.
- * The value of the fourth word (the word with the highest address) remains unchanged
- */
-static inline void vmathV3StoreXYZ( const VmathVector3 *vec, vec_float4 *quad );
-
-/*
- * Load four three-float 3-D vectors, stored in three quadwords
- */
-static inline void vmathV3LoadXYZArray( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads );
-
-/*
- * Store four 3-D vectors in three quadwords
- */
-static inline void vmathV3StoreXYZArray( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, vec_float4 *threeQuads );
-
-/*
- * Store eight 3-D vectors as half-floats
- */
-static inline void vmathV3StoreHalfFloats( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, const VmathVector3 *vec4, const VmathVector3 *vec5, const VmathVector3 *vec6, const VmathVector3 *vec7, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV3Print( const VmathVector3 *vec );
-
-/*
- * Print a 3-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV3Prints( const VmathVector3 *vec, const char *name );
-
-#endif
-
-/*
- * Copy a 4-D vector
- */
-static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Construct a 4-D vector from x, y, z, and w elements
- */
-static inline void vmathV4MakeFromElems( VmathVector4 *result, float x, float y, float z, float w );
-
-/*
- * Construct a 4-D vector from a 3-D vector and a scalar
- */
-static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float w );
-
-/*
- * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
- */
-static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec );
-
-/*
- * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
- */
-static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt );
-
-/*
- * Copy elements from a quaternion into a 4-D vector
- */
-static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat );
-
-/*
- * Set all elements of a 4-D vector to the same scalar value
- */
-static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar );
-
-/*
- * Set vector float data in a 4-D vector
- */
-static inline void vmathV4MakeFrom128( VmathVector4 *result, vec_float4 vf4 );
-
-/*
- * Get vector float data from a 4-D vector
- */
-static inline vec_float4 vmathV4Get128( const VmathVector4 *vec );
-
-/*
- * Set the x, y, and z elements of a 4-D vector
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec );
-
-/*
- * Get the x, y, and z elements of a 4-D vector
- */
-static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec );
-
-/*
- * Set the x element of a 4-D vector
- */
-static inline void vmathV4SetX( VmathVector4 *result, float x );
-
-/*
- * Set the y element of a 4-D vector
- */
-static inline void vmathV4SetY( VmathVector4 *result, float y );
-
-/*
- * Set the z element of a 4-D vector
- */
-static inline void vmathV4SetZ( VmathVector4 *result, float z );
-
-/*
- * Set the w element of a 4-D vector
- */
-static inline void vmathV4SetW( VmathVector4 *result, float w );
-
-/*
- * Get the x element of a 4-D vector
- */
-static inline float vmathV4GetX( const VmathVector4 *vec );
-
-/*
- * Get the y element of a 4-D vector
- */
-static inline float vmathV4GetY( const VmathVector4 *vec );
-
-/*
- * Get the z element of a 4-D vector
- */
-static inline float vmathV4GetZ( const VmathVector4 *vec );
-
-/*
- * Get the w element of a 4-D vector
- */
-static inline float vmathV4GetW( const VmathVector4 *vec );
-
-/*
- * Set an x, y, z, or w element of a 4-D vector by index
- */
-static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value );
-
-/*
- * Get an x, y, z, or w element of a 4-D vector by index
- */
-static inline float vmathV4GetElem( const VmathVector4 *vec, int idx );
-
-/*
- * Add two 4-D vectors
- */
-static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Subtract a 4-D vector from another 4-D vector
- */
-static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Multiply a 4-D vector by a scalar
- */
-static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar );
-
-/*
- * Divide a 4-D vector by a scalar
- */
-static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar );
-
-/*
- * Negate all elements of a 4-D vector
- */
-static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Construct x axis
- */
-static inline void vmathV4MakeXAxis( VmathVector4 *result );
-
-/*
- * Construct y axis
- */
-static inline void vmathV4MakeYAxis( VmathVector4 *result );
-
-/*
- * Construct z axis
- */
-static inline void vmathV4MakeZAxis( VmathVector4 *result );
-
-/*
- * Construct w axis
- */
-static inline void vmathV4MakeWAxis( VmathVector4 *result );
-
-/*
- * Multiply two 4-D vectors per element
- */
-static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Divide two 4-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Compute the reciprocal of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Compute the square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Compute the reciprocal square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Compute the absolute value of a 4-D vector per element
- */
-static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Copy sign from one 4-D vector to another, per element
- */
-static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Maximum of two 4-D vectors per element
- */
-static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Minimum of two 4-D vectors per element
- */
-static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Maximum element of a 4-D vector
- */
-static inline float vmathV4MaxElem( const VmathVector4 *vec );
-
-/*
- * Minimum element of a 4-D vector
- */
-static inline float vmathV4MinElem( const VmathVector4 *vec );
-
-/*
- * Compute the sum of all elements of a 4-D vector
- */
-static inline float vmathV4Sum( const VmathVector4 *vec );
-
-/*
- * Compute the dot product of two 4-D vectors
- */
-static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Compute the square of the length of a 4-D vector
- */
-static inline float vmathV4LengthSqr( const VmathVector4 *vec );
-
-/*
- * Compute the length of a 4-D vector
- */
-static inline float vmathV4Length( const VmathVector4 *vec );
-
-/*
- * Normalize a 4-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Outer product of two 4-D vectors
- */
-static inline void vmathV4Outer( VmathMatrix4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Linear interpolation between two 4-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Spherical linear interpolation between two 4-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 );
-
-/*
- * Conditionally select between two 4-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 );
-
-/*
- * Store four 4-D vectors as half-floats
- */
-static inline void vmathV4StoreHalfFloats( const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3, vec_ushort8 *twoQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV4Print( const VmathVector4 *vec );
-
-/*
- * Print a 4-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV4Prints( const VmathVector4 *vec, const char *name );
-
-#endif
-
-/*
- * Copy a 3-D point
- */
-static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Construct a 3-D point from x, y, and z elements
- */
-static inline void vmathP3MakeFromElems( VmathPoint3 *result, float x, float y, float z );
-
-/*
- * Copy elements from a 3-D vector into a 3-D point
- */
-static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec );
-
-/*
- * Set all elements of a 3-D point to the same scalar value
- */
-static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar );
-
-/*
- * Set vector float data in a 3-D point
- */
-static inline void vmathP3MakeFrom128( VmathPoint3 *result, vec_float4 vf4 );
-
-/*
- * Get vector float data from a 3-D point
- */
-static inline vec_float4 vmathP3Get128( const VmathPoint3 *pnt );
-
-/*
- * Set the x element of a 3-D point
- */
-static inline void vmathP3SetX( VmathPoint3 *result, float x );
-
-/*
- * Set the y element of a 3-D point
- */
-static inline void vmathP3SetY( VmathPoint3 *result, float y );
-
-/*
- * Set the z element of a 3-D point
- */
-static inline void vmathP3SetZ( VmathPoint3 *result, float z );
-
-/*
- * Get the x element of a 3-D point
- */
-static inline float vmathP3GetX( const VmathPoint3 *pnt );
-
-/*
- * Get the y element of a 3-D point
- */
-static inline float vmathP3GetY( const VmathPoint3 *pnt );
-
-/*
- * Get the z element of a 3-D point
- */
-static inline float vmathP3GetZ( const VmathPoint3 *pnt );
-
-/*
- * Set an x, y, or z element of a 3-D point by index
- */
-static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value );
-
-/*
- * Get an x, y, or z element of a 3-D point by index
- */
-static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx );
-
-/*
- * Subtract a 3-D point from another 3-D point
- */
-static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Add a 3-D point to a 3-D vector
- */
-static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec );
-
-/*
- * Subtract a 3-D vector from a 3-D point
- */
-static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec );
-
-/*
- * Multiply two 3-D points per element
- */
-static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Divide two 3-D points per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Compute the reciprocal of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Compute the square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Compute the reciprocal square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Compute the absolute value of a 3-D point per element
- */
-static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Copy sign from one 3-D point to another, per element
- */
-static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Maximum of two 3-D points per element
- */
-static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Minimum of two 3-D points per element
- */
-static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Maximum element of a 3-D point
- */
-static inline float vmathP3MaxElem( const VmathPoint3 *pnt );
-
-/*
- * Minimum element of a 3-D point
- */
-static inline float vmathP3MinElem( const VmathPoint3 *pnt );
-
-/*
- * Compute the sum of all elements of a 3-D point
- */
-static inline float vmathP3Sum( const VmathPoint3 *pnt );
-
-/*
- * Apply uniform scale to a 3-D point
- */
-static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal );
-
-/*
- * Apply non-uniform scale to a 3-D point
- */
-static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec );
-
-/*
- * Scalar projection of a 3-D point on a unit-length 3-D vector
- */
-static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec );
-
-/*
- * Compute the square of the distance of a 3-D point from the coordinate-system origin
- */
-static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt );
-
-/*
- * Compute the distance of a 3-D point from the coordinate-system origin
- */
-static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt );
-
-/*
- * Compute the square of the distance between two 3-D points
- */
-static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Compute the distance between two 3-D points
- */
-static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Linear interpolation between two 3-D points
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Conditionally select between two 3-D points
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 );
-
-/*
- * Store x, y, and z elements of a 3-D point in the first three words of a quadword.
- * The value of the fourth word (the word with the highest address) remains unchanged
- */
-static inline void vmathP3StoreXYZ( const VmathPoint3 *pnt, vec_float4 *quad );
-
-/*
- * Load four three-float 3-D points, stored in three quadwords
- */
-static inline void vmathP3LoadXYZArray( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads );
-
-/*
- * Store four 3-D points in three quadwords
- */
-static inline void vmathP3StoreXYZArray( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, vec_float4 *threeQuads );
-
-/*
- * Store eight 3-D points as half-floats
- */
-static inline void vmathP3StoreHalfFloats( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, const VmathPoint3 *pnt4, const VmathPoint3 *pnt5, const VmathPoint3 *pnt6, const VmathPoint3 *pnt7, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D point
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathP3Print( const VmathPoint3 *pnt );
-
-/*
- * Print a 3-D point and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name );
-
-#endif
-
-/*
- * Copy a quaternion
- */
-static inline void vmathQCopy( VmathQuat *result, const VmathQuat *quat );
-
-/*
- * Construct a quaternion from x, y, z, and w elements
- */
-static inline void vmathQMakeFromElems( VmathQuat *result, float x, float y, float z, float w );
-
-/*
- * Construct a quaternion from a 3-D vector and a scalar
- */
-static inline void vmathQMakeFromV3Scalar( VmathQuat *result, const VmathVector3 *xyz, float w );
-
-/*
- * Copy elements from a 4-D vector into a quaternion
- */
-static inline void vmathQMakeFromV4( VmathQuat *result, const VmathVector4 *vec );
-
-/*
- * Convert a rotation matrix to a unit-length quaternion
- */
-static inline void vmathQMakeFromM3( VmathQuat *result, const VmathMatrix3 *rotMat );
-
-/*
- * Set all elements of a quaternion to the same scalar value
- */
-static inline void vmathQMakeFromScalar( VmathQuat *result, float scalar );
-
-/*
- * Set vector float data in a quaternion
- */
-static inline void vmathQMakeFrom128( VmathQuat *result, vec_float4 vf4 );
-
-/*
- * Get vector float data from a quaternion
- */
-static inline vec_float4 vmathQGet128( const VmathQuat *quat );
-
-/*
- * Set the x, y, and z elements of a quaternion
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathQSetXYZ( VmathQuat *result, const VmathVector3 *vec );
-
-/*
- * Get the x, y, and z elements of a quaternion
- */
-static inline void vmathQGetXYZ( VmathVector3 *result, const VmathQuat *quat );
-
-/*
- * Set the x element of a quaternion
- */
-static inline void vmathQSetX( VmathQuat *result, float x );
-
-/*
- * Set the y element of a quaternion
- */
-static inline void vmathQSetY( VmathQuat *result, float y );
-
-/*
- * Set the z element of a quaternion
- */
-static inline void vmathQSetZ( VmathQuat *result, float z );
-
-/*
- * Set the w element of a quaternion
- */
-static inline void vmathQSetW( VmathQuat *result, float w );
-
-/*
- * Get the x element of a quaternion
- */
-static inline float vmathQGetX( const VmathQuat *quat );
-
-/*
- * Get the y element of a quaternion
- */
-static inline float vmathQGetY( const VmathQuat *quat );
-
-/*
- * Get the z element of a quaternion
- */
-static inline float vmathQGetZ( const VmathQuat *quat );
-
-/*
- * Get the w element of a quaternion
- */
-static inline float vmathQGetW( const VmathQuat *quat );
-
-/*
- * Set an x, y, z, or w element of a quaternion by index
- */
-static inline void vmathQSetElem( VmathQuat *result, int idx, float value );
-
-/*
- * Get an x, y, z, or w element of a quaternion by index
- */
-static inline float vmathQGetElem( const VmathQuat *quat, int idx );
-
-/*
- * Add two quaternions
- */
-static inline void vmathQAdd( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Subtract a quaternion from another quaternion
- */
-static inline void vmathQSub( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Multiply two quaternions
- */
-static inline void vmathQMul( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Multiply a quaternion by a scalar
- */
-static inline void vmathQScalarMul( VmathQuat *result, const VmathQuat *quat, float scalar );
-
-/*
- * Divide a quaternion by a scalar
- */
-static inline void vmathQScalarDiv( VmathQuat *result, const VmathQuat *quat, float scalar );
-
-/*
- * Negate all elements of a quaternion
- */
-static inline void vmathQNeg( VmathQuat *result, const VmathQuat *quat );
-
-/*
- * Construct an identity quaternion
- */
-static inline void vmathQMakeIdentity( VmathQuat *result );
-
-/*
- * Construct a quaternion to rotate between two unit-length 3-D vectors
- * NOTE: 
- * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
- */
-static inline void vmathQMakeRotationArc( VmathQuat *result, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 );
-
-/*
- * Construct a quaternion to rotate around a unit-length 3-D vector
- */
-static inline void vmathQMakeRotationAxis( VmathQuat *result, float radians, const VmathVector3 *unitVec );
-
-/*
- * Construct a quaternion to rotate around the x axis
- */
-static inline void vmathQMakeRotationX( VmathQuat *result, float radians );
-
-/*
- * Construct a quaternion to rotate around the y axis
- */
-static inline void vmathQMakeRotationY( VmathQuat *result, float radians );
-
-/*
- * Construct a quaternion to rotate around the z axis
- */
-static inline void vmathQMakeRotationZ( VmathQuat *result, float radians );
-
-/*
- * Compute the conjugate of a quaternion
- */
-static inline void vmathQConj( VmathQuat *result, const VmathQuat *quat );
-
-/*
- * Use a unit-length quaternion to rotate a 3-D vector
- */
-static inline void vmathQRotate( VmathVector3 *result, const VmathQuat *unitQuat, const VmathVector3 *vec );
-
-/*
- * Compute the dot product of two quaternions
- */
-static inline float vmathQDot( const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Compute the norm of a quaternion
- */
-static inline float vmathQNorm( const VmathQuat *quat );
-
-/*
- * Compute the length of a quaternion
- */
-static inline float vmathQLength( const VmathQuat *quat );
-
-/*
- * Normalize a quaternion
- * NOTE: 
- * The result is unpredictable when all elements of quat are at or near zero.
- */
-static inline void vmathQNormalize( VmathQuat *result, const VmathQuat *quat );
-
-/*
- * Linear interpolation between two quaternions
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathQLerp( VmathQuat *result, float t, const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Spherical linear interpolation between two quaternions
- * NOTE: 
- * Interpolates along the shortest path between orientations.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathQSlerp( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1 );
-
-/*
- * Spherical quadrangle interpolation
- */
-static inline void vmathQSquad( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1, const VmathQuat *unitQuat2, const VmathQuat *unitQuat3 );
-
-/*
- * Conditionally select between two quaternions
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline void vmathQSelect( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a quaternion
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathQPrint( const VmathQuat *quat );
-
-/*
- * Print a quaternion and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathQPrints( const VmathQuat *quat, const char *name );
-
-#endif
-
-/*
- * Copy a 3x3 matrix
- */
-static inline void vmathM3Copy( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Construct a 3x3 matrix containing the specified columns
- */
-static inline void vmathM3MakeFromCols( VmathMatrix3 *result, const VmathVector3 *col0, const VmathVector3 *col1, const VmathVector3 *col2 );
-
-/*
- * Construct a 3x3 rotation matrix from a unit-length quaternion
- */
-static inline void vmathM3MakeFromQ( VmathMatrix3 *result, const VmathQuat *unitQuat );
-
-/*
- * Set all elements of a 3x3 matrix to the same scalar value
- */
-static inline void vmathM3MakeFromScalar( VmathMatrix3 *result, float scalar );
-
-/*
- * Set column 0 of a 3x3 matrix
- */
-static inline void vmathM3SetCol0( VmathMatrix3 *result, const VmathVector3 *col0 );
-
-/*
- * Set column 1 of a 3x3 matrix
- */
-static inline void vmathM3SetCol1( VmathMatrix3 *result, const VmathVector3 *col1 );
-
-/*
- * Set column 2 of a 3x3 matrix
- */
-static inline void vmathM3SetCol2( VmathMatrix3 *result, const VmathVector3 *col2 );
-
-/*
- * Get column 0 of a 3x3 matrix
- */
-static inline void vmathM3GetCol0( VmathVector3 *result, const VmathMatrix3 *mat );
-
-/*
- * Get column 1 of a 3x3 matrix
- */
-static inline void vmathM3GetCol1( VmathVector3 *result, const VmathMatrix3 *mat );
-
-/*
- * Get column 2 of a 3x3 matrix
- */
-static inline void vmathM3GetCol2( VmathVector3 *result, const VmathMatrix3 *mat );
-
-/*
- * Set the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3SetCol( VmathMatrix3 *result, int col, const VmathVector3 *vec );
-
-/*
- * Set the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3SetRow( VmathMatrix3 *result, int row, const VmathVector3 *vec );
-
-/*
- * Get the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3GetCol( VmathVector3 *result, const VmathMatrix3 *mat, int col );
-
-/*
- * Get the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3GetRow( VmathVector3 *result, const VmathMatrix3 *mat, int row );
-
-/*
- * Set the element of a 3x3 matrix referred to by column and row indices
- */
-static inline void vmathM3SetElem( VmathMatrix3 *result, int col, int row, float val );
-
-/*
- * Get the element of a 3x3 matrix referred to by column and row indices
- */
-static inline float vmathM3GetElem( const VmathMatrix3 *mat, int col, int row );
-
-/*
- * Add two 3x3 matrices
- */
-static inline void vmathM3Add( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
-
-/*
- * Subtract a 3x3 matrix from another 3x3 matrix
- */
-static inline void vmathM3Sub( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
-
-/*
- * Negate all elements of a 3x3 matrix
- */
-static inline void vmathM3Neg( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Multiply a 3x3 matrix by a scalar
- */
-static inline void vmathM3ScalarMul( VmathMatrix3 *result, const VmathMatrix3 *mat, float scalar );
-
-/*
- * Multiply a 3x3 matrix by a 3-D vector
- */
-static inline void vmathM3MulV3( VmathVector3 *result, const VmathMatrix3 *mat, const VmathVector3 *vec );
-
-/*
- * Multiply two 3x3 matrices
- */
-static inline void vmathM3Mul( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
-
-/*
- * Construct an identity 3x3 matrix
- */
-static inline void vmathM3MakeIdentity( VmathMatrix3 *result );
-
-/*
- * Construct a 3x3 matrix to rotate around the x axis
- */
-static inline void vmathM3MakeRotationX( VmathMatrix3 *result, float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the y axis
- */
-static inline void vmathM3MakeRotationY( VmathMatrix3 *result, float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the z axis
- */
-static inline void vmathM3MakeRotationZ( VmathMatrix3 *result, float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the x, y, and z axes
- */
-static inline void vmathM3MakeRotationZYX( VmathMatrix3 *result, const VmathVector3 *radiansXYZ );
-
-/*
- * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathM3MakeRotationAxis( VmathMatrix3 *result, float radians, const VmathVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathM3MakeRotationQ( VmathMatrix3 *result, const VmathQuat *unitQuat );
-
-/*
- * Construct a 3x3 matrix to perform scaling
- */
-static inline void vmathM3MakeScale( VmathMatrix3 *result, const VmathVector3 *scaleVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathM3AppendScale( VmathMatrix3 *result, const VmathMatrix3 *mat, const VmathVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathM3PrependScale( VmathMatrix3 *result, const VmathVector3 *scaleVec, const VmathMatrix3 *mat );
-
-/*
- * Multiply two 3x3 matrices per element
- */
-static inline void vmathM3MulPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
-
-/*
- * Compute the absolute value of a 3x3 matrix per element
- */
-static inline void vmathM3AbsPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Transpose of a 3x3 matrix
- */
-static inline void vmathM3Transpose( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Compute the inverse of a 3x3 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathM3Inverse( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Determinant of a 3x3 matrix
- */
-static inline float vmathM3Determinant( const VmathMatrix3 *mat );
-
-/*
- * Conditionally select between two 3x3 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline void vmathM3Select( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x3 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM3Print( const VmathMatrix3 *mat );
-
-/*
- * Print a 3x3 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM3Prints( const VmathMatrix3 *mat, const char *name );
-
-#endif
-
-/*
- * Copy a 4x4 matrix
- */
-static inline void vmathM4Copy( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Construct a 4x4 matrix containing the specified columns
- */
-static inline void vmathM4MakeFromCols( VmathMatrix4 *result, const VmathVector4 *col0, const VmathVector4 *col1, const VmathVector4 *col2, const VmathVector4 *col3 );
-
-/*
- * Construct a 4x4 matrix from a 3x4 transformation matrix
- */
-static inline void vmathM4MakeFromT3( VmathMatrix4 *result, const VmathTransform3 *mat );
-
-/*
- * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
- */
-static inline void vmathM4MakeFromM3V3( VmathMatrix4 *result, const VmathMatrix3 *mat, const VmathVector3 *translateVec );
-
-/*
- * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
- */
-static inline void vmathM4MakeFromQV3( VmathMatrix4 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec );
-
-/*
- * Set all elements of a 4x4 matrix to the same scalar value
- */
-static inline void vmathM4MakeFromScalar( VmathMatrix4 *result, float scalar );
-
-/*
- * Set the upper-left 3x3 submatrix
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathM4SetUpper3x3( VmathMatrix4 *result, const VmathMatrix3 *mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 4x4 matrix
- */
-static inline void vmathM4GetUpper3x3( VmathMatrix3 *result, const VmathMatrix4 *mat );
-
-/*
- * Set translation component
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathM4SetTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec );
-
-/*
- * Get the translation component of a 4x4 matrix
- */
-static inline void vmathM4GetTranslation( VmathVector3 *result, const VmathMatrix4 *mat );
-
-/*
- * Set column 0 of a 4x4 matrix
- */
-static inline void vmathM4SetCol0( VmathMatrix4 *result, const VmathVector4 *col0 );
-
-/*
- * Set column 1 of a 4x4 matrix
- */
-static inline void vmathM4SetCol1( VmathMatrix4 *result, const VmathVector4 *col1 );
-
-/*
- * Set column 2 of a 4x4 matrix
- */
-static inline void vmathM4SetCol2( VmathMatrix4 *result, const VmathVector4 *col2 );
-
-/*
- * Set column 3 of a 4x4 matrix
- */
-static inline void vmathM4SetCol3( VmathMatrix4 *result, const VmathVector4 *col3 );
-
-/*
- * Get column 0 of a 4x4 matrix
- */
-static inline void vmathM4GetCol0( VmathVector4 *result, const VmathMatrix4 *mat );
-
-/*
- * Get column 1 of a 4x4 matrix
- */
-static inline void vmathM4GetCol1( VmathVector4 *result, const VmathMatrix4 *mat );
-
-/*
- * Get column 2 of a 4x4 matrix
- */
-static inline void vmathM4GetCol2( VmathVector4 *result, const VmathMatrix4 *mat );
-
-/*
- * Get column 3 of a 4x4 matrix
- */
-static inline void vmathM4GetCol3( VmathVector4 *result, const VmathMatrix4 *mat );
-
-/*
- * Set the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4SetCol( VmathMatrix4 *result, int col, const VmathVector4 *vec );
-
-/*
- * Set the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4SetRow( VmathMatrix4 *result, int row, const VmathVector4 *vec );
-
-/*
- * Get the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4GetCol( VmathVector4 *result, const VmathMatrix4 *mat, int col );
-
-/*
- * Get the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4GetRow( VmathVector4 *result, const VmathMatrix4 *mat, int row );
-
-/*
- * Set the element of a 4x4 matrix referred to by column and row indices
- */
-static inline void vmathM4SetElem( VmathMatrix4 *result, int col, int row, float val );
-
-/*
- * Get the element of a 4x4 matrix referred to by column and row indices
- */
-static inline float vmathM4GetElem( const VmathMatrix4 *mat, int col, int row );
-
-/*
- * Add two 4x4 matrices
- */
-static inline void vmathM4Add( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
-
-/*
- * Subtract a 4x4 matrix from another 4x4 matrix
- */
-static inline void vmathM4Sub( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
-
-/*
- * Negate all elements of a 4x4 matrix
- */
-static inline void vmathM4Neg( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Multiply a 4x4 matrix by a scalar
- */
-static inline void vmathM4ScalarMul( VmathMatrix4 *result, const VmathMatrix4 *mat, float scalar );
-
-/*
- * Multiply a 4x4 matrix by a 4-D vector
- */
-static inline void vmathM4MulV4( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector4 *vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D vector
- */
-static inline void vmathM4MulV3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector3 *vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D point
- */
-static inline void vmathM4MulP3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathPoint3 *pnt );
-
-/*
- * Multiply two 4x4 matrices
- */
-static inline void vmathM4Mul( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
-
-/*
- * Multiply a 4x4 matrix by a 3x4 transformation matrix
- */
-static inline void vmathM4MulT3( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathTransform3 *tfrm );
-
-/*
- * Construct an identity 4x4 matrix
- */
-static inline void vmathM4MakeIdentity( VmathMatrix4 *result );
-
-/*
- * Construct a 4x4 matrix to rotate around the x axis
- */
-static inline void vmathM4MakeRotationX( VmathMatrix4 *result, float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the y axis
- */
-static inline void vmathM4MakeRotationY( VmathMatrix4 *result, float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the z axis
- */
-static inline void vmathM4MakeRotationZ( VmathMatrix4 *result, float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the x, y, and z axes
- */
-static inline void vmathM4MakeRotationZYX( VmathMatrix4 *result, const VmathVector3 *radiansXYZ );
-
-/*
- * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathM4MakeRotationAxis( VmathMatrix4 *result, float radians, const VmathVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathM4MakeRotationQ( VmathMatrix4 *result, const VmathQuat *unitQuat );
-
-/*
- * Construct a 4x4 matrix to perform scaling
- */
-static inline void vmathM4MakeScale( VmathMatrix4 *result, const VmathVector3 *scaleVec );
-
-/*
- * Construct a 4x4 matrix to perform translation
- */
-static inline void vmathM4MakeTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec );
-
-/*
- * Construct viewing matrix based on eye position, position looked at, and up direction
- */
-static inline void vmathM4MakeLookAt( VmathMatrix4 *result, const VmathPoint3 *eyePos, const VmathPoint3 *lookAtPos, const VmathVector3 *upVec );
-
-/*
- * Construct a perspective projection matrix
- */
-static inline void vmathM4MakePerspective( VmathMatrix4 *result, float fovyRadians, float aspect, float zNear, float zFar );
-
-/*
- * Construct a perspective projection matrix based on frustum
- */
-static inline void vmathM4MakeFrustum( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar );
-
-/*
- * Construct an orthographic projection matrix
- */
-static inline void vmathM4MakeOrthographic( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar );
-
-/*
- * Append (post-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathM4AppendScale( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathM4PrependScale( VmathMatrix4 *result, const VmathVector3 *scaleVec, const VmathMatrix4 *mat );
-
-/*
- * Multiply two 4x4 matrices per element
- */
-static inline void vmathM4MulPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
-
-/*
- * Compute the absolute value of a 4x4 matrix per element
- */
-static inline void vmathM4AbsPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Transpose of a 4x4 matrix
- */
-static inline void vmathM4Transpose( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathM4Inverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathM4AffineInverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
- */
-static inline void vmathM4OrthoInverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Determinant of a 4x4 matrix
- */
-static inline float vmathM4Determinant( const VmathMatrix4 *mat );
-
-/*
- * Conditionally select between two 4x4 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline void vmathM4Select( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4x4 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM4Print( const VmathMatrix4 *mat );
-
-/*
- * Print a 4x4 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM4Prints( const VmathMatrix4 *mat, const char *name );
-
-#endif
-
-/*
- * Copy a 3x4 transformation matrix
- */
-static inline void vmathT3Copy( VmathTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Construct a 3x4 transformation matrix containing the specified columns
- */
-static inline void vmathT3MakeFromCols( VmathTransform3 *result, const VmathVector3 *col0, const VmathVector3 *col1, const VmathVector3 *col2, const VmathVector3 *col3 );
-
-/*
- * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
- */
-static inline void vmathT3MakeFromM3V3( VmathTransform3 *result, const VmathMatrix3 *tfrm, const VmathVector3 *translateVec );
-
-/*
- * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
- */
-static inline void vmathT3MakeFromQV3( VmathTransform3 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec );
-
-/*
- * Set all elements of a 3x4 transformation matrix to the same scalar value
- */
-static inline void vmathT3MakeFromScalar( VmathTransform3 *result, float scalar );
-
-/*
- * Set the upper-left 3x3 submatrix
- */
-static inline void vmathT3SetUpper3x3( VmathTransform3 *result, const VmathMatrix3 *mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
- */
-static inline void vmathT3GetUpper3x3( VmathMatrix3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Set translation component
- */
-static inline void vmathT3SetTranslation( VmathTransform3 *result, const VmathVector3 *translateVec );
-
-/*
- * Get the translation component of a 3x4 transformation matrix
- */
-static inline void vmathT3GetTranslation( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Set column 0 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol0( VmathTransform3 *result, const VmathVector3 *col0 );
-
-/*
- * Set column 1 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol1( VmathTransform3 *result, const VmathVector3 *col1 );
-
-/*
- * Set column 2 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol2( VmathTransform3 *result, const VmathVector3 *col2 );
-
-/*
- * Set column 3 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol3( VmathTransform3 *result, const VmathVector3 *col3 );
-
-/*
- * Get column 0 of a 3x4 transformation matrix
- */
-static inline void vmathT3GetCol0( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Get column 1 of a 3x4 transformation matrix
- */
-static inline void vmathT3GetCol1( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Get column 2 of a 3x4 transformation matrix
- */
-static inline void vmathT3GetCol2( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Get column 3 of a 3x4 transformation matrix
- */
-static inline void vmathT3GetCol3( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Set the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3SetCol( VmathTransform3 *result, int col, const VmathVector3 *vec );
-
-/*
- * Set the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3SetRow( VmathTransform3 *result, int row, const VmathVector4 *vec );
-
-/*
- * Get the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3GetCol( VmathVector3 *result, const VmathTransform3 *tfrm, int col );
-
-/*
- * Get the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3GetRow( VmathVector4 *result, const VmathTransform3 *tfrm, int row );
-
-/*
- * Set the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline void vmathT3SetElem( VmathTransform3 *result, int col, int row, float val );
-
-/*
- * Get the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline float vmathT3GetElem( const VmathTransform3 *tfrm, int col, int row );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D vector
- */
-static inline void vmathT3MulV3( VmathVector3 *result, const VmathTransform3 *tfrm, const VmathVector3 *vec );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D point
- */
-static inline void vmathT3MulP3( VmathPoint3 *result, const VmathTransform3 *tfrm, const VmathPoint3 *pnt );
-
-/*
- * Multiply two 3x4 transformation matrices
- */
-static inline void vmathT3Mul( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 );
-
-/*
- * Construct an identity 3x4 transformation matrix
- */
-static inline void vmathT3MakeIdentity( VmathTransform3 *result );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x axis
- */
-static inline void vmathT3MakeRotationX( VmathTransform3 *result, float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the y axis
- */
-static inline void vmathT3MakeRotationY( VmathTransform3 *result, float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the z axis
- */
-static inline void vmathT3MakeRotationZ( VmathTransform3 *result, float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
- */
-static inline void vmathT3MakeRotationZYX( VmathTransform3 *result, const VmathVector3 *radiansXYZ );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathT3MakeRotationAxis( VmathTransform3 *result, float radians, const VmathVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathT3MakeRotationQ( VmathTransform3 *result, const VmathQuat *unitQuat );
-
-/*
- * Construct a 3x4 transformation matrix to perform scaling
- */
-static inline void vmathT3MakeScale( VmathTransform3 *result, const VmathVector3 *scaleVec );
-
-/*
- * Construct a 3x4 transformation matrix to perform translation
- */
-static inline void vmathT3MakeTranslation( VmathTransform3 *result, const VmathVector3 *translateVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathT3AppendScale( VmathTransform3 *result, const VmathTransform3 *tfrm, const VmathVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathT3PrependScale( VmathTransform3 *result, const VmathVector3 *scaleVec, const VmathTransform3 *tfrm );
-
-/*
- * Multiply two 3x4 transformation matrices per element
- */
-static inline void vmathT3MulPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 );
-
-/*
- * Compute the absolute value of a 3x4 transformation matrix per element
- */
-static inline void vmathT3AbsPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Inverse of a 3x4 transformation matrix
- * NOTE: 
- * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
- */
-static inline void vmathT3Inverse( VmathTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
- */
-static inline void vmathT3OrthoInverse( VmathTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Conditionally select between two 3x4 transformation matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline void vmathT3Select( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x4 transformation matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathT3Print( const VmathTransform3 *tfrm );
-
-/*
- * Print a 3x4 transformation matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathT3Prints( const VmathTransform3 *tfrm, const char *name );
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#include "vec_aos.h"
-#include "quat_aos.h"
-#include "mat_aos.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_AOS_C_PPU_H
+#define _VECTORMATH_AOS_C_PPU_H
+
+#include <math.h>
+#include <altivec.h>
+#include <simdmath.h>
+#include "vec_types.h"
+
+#ifdef _VECTORMATH_DEBUG
+#include <stdio.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef _VECTORMATH_AOS_C_TYPES_H
+#define _VECTORMATH_AOS_C_TYPES_H
+
+/* A 3-D vector in array-of-structures format
+ */
+typedef struct _VmathVector3
+{
+    vec_float4 vec128;
+} VmathVector3;
+
+/* A 4-D vector in array-of-structures format
+ */
+typedef struct _VmathVector4
+{
+    vec_float4 vec128;
+} VmathVector4;
+
+/* A 3-D point in array-of-structures format
+ */
+typedef struct _VmathPoint3
+{
+    vec_float4 vec128;
+} VmathPoint3;
+
+/* A quaternion in array-of-structures format
+ */
+typedef struct _VmathQuat
+{
+    vec_float4 vec128;
+} VmathQuat;
+
+/* A 3x3 matrix in array-of-structures format
+ */
+typedef struct _VmathMatrix3
+{
+    VmathVector3 col0;
+    VmathVector3 col1;
+    VmathVector3 col2;
+} VmathMatrix3;
+
+/* A 4x4 matrix in array-of-structures format
+ */
+typedef struct _VmathMatrix4
+{
+    VmathVector4 col0;
+    VmathVector4 col1;
+    VmathVector4 col2;
+    VmathVector4 col3;
+} VmathMatrix4;
+
+/* A 3x4 transformation matrix in array-of-structures format
+ */
+typedef struct _VmathTransform3
+{
+    VmathVector3 col0;
+    VmathVector3 col1;
+    VmathVector3 col2;
+    VmathVector3 col3;
+} VmathTransform3;
+
+#endif
+
+/*
+ * Copy a 3-D vector
+ */
+static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Construct a 3-D vector from x, y, and z elements
+ */
+static inline void vmathV3MakeFromElems( VmathVector3 *result, float x, float y, float z );
+
+/*
+ * Copy elements from a 3-D point into a 3-D vector
+ */
+static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Set all elements of a 3-D vector to the same scalar value
+ */
+static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar );
+
+/*
+ * Set vector float data in a 3-D vector
+ */
+static inline void vmathV3MakeFrom128( VmathVector3 *result, vec_float4 vf4 );
+
+/*
+ * Get vector float data from a 3-D vector
+ */
+static inline vec_float4 vmathV3Get128( const VmathVector3 *vec );
+
+/*
+ * Set the x element of a 3-D vector
+ */
+static inline void vmathV3SetX( VmathVector3 *result, float x );
+
+/*
+ * Set the y element of a 3-D vector
+ */
+static inline void vmathV3SetY( VmathVector3 *result, float y );
+
+/*
+ * Set the z element of a 3-D vector
+ */
+static inline void vmathV3SetZ( VmathVector3 *result, float z );
+
+/*
+ * Get the x element of a 3-D vector
+ */
+static inline float vmathV3GetX( const VmathVector3 *vec );
+
+/*
+ * Get the y element of a 3-D vector
+ */
+static inline float vmathV3GetY( const VmathVector3 *vec );
+
+/*
+ * Get the z element of a 3-D vector
+ */
+static inline float vmathV3GetZ( const VmathVector3 *vec );
+
+/*
+ * Set an x, y, or z element of a 3-D vector by index
+ */
+static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value );
+
+/*
+ * Get an x, y, or z element of a 3-D vector by index
+ */
+static inline float vmathV3GetElem( const VmathVector3 *vec, int idx );
+
+/*
+ * Add two 3-D vectors
+ */
+static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Subtract a 3-D vector from another 3-D vector
+ */
+static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Add a 3-D vector to a 3-D point
+ */
+static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt );
+
+/*
+ * Multiply a 3-D vector by a scalar
+ */
+static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar );
+
+/*
+ * Divide a 3-D vector by a scalar
+ */
+static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar );
+
+/*
+ * Negate all elements of a 3-D vector
+ */
+static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Construct x axis
+ */
+static inline void vmathV3MakeXAxis( VmathVector3 *result );
+
+/*
+ * Construct y axis
+ */
+static inline void vmathV3MakeYAxis( VmathVector3 *result );
+
+/*
+ * Construct z axis
+ */
+static inline void vmathV3MakeZAxis( VmathVector3 *result );
+
+/*
+ * Multiply two 3-D vectors per element
+ */
+static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Divide two 3-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Compute the reciprocal of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Compute the square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Compute the reciprocal square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Compute the absolute value of a 3-D vector per element
+ */
+static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Copy sign from one 3-D vector to another, per element
+ */
+static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Maximum of two 3-D vectors per element
+ */
+static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Minimum of two 3-D vectors per element
+ */
+static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Maximum element of a 3-D vector
+ */
+static inline float vmathV3MaxElem( const VmathVector3 *vec );
+
+/*
+ * Minimum element of a 3-D vector
+ */
+static inline float vmathV3MinElem( const VmathVector3 *vec );
+
+/*
+ * Compute the sum of all elements of a 3-D vector
+ */
+static inline float vmathV3Sum( const VmathVector3 *vec );
+
+/*
+ * Compute the dot product of two 3-D vectors
+ */
+static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Compute the square of the length of a 3-D vector
+ */
+static inline float vmathV3LengthSqr( const VmathVector3 *vec );
+
+/*
+ * Compute the length of a 3-D vector
+ */
+static inline float vmathV3Length( const VmathVector3 *vec );
+
+/*
+ * Normalize a 3-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Compute cross product of two 3-D vectors
+ */
+static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Outer product of two 3-D vectors
+ */
+static inline void vmathV3Outer( VmathMatrix3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Pre-multiply a row vector by a 3x3 matrix
+ * NOTE: 
+ * Slower than column post-multiply.
+ */
+static inline void vmathV3RowMul( VmathVector3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat );
+
+/*
+ * Cross-product matrix of a 3-D vector
+ */
+static inline void vmathV3CrossMatrix( VmathMatrix3 *result, const VmathVector3 *vec );
+
+/*
+ * Create cross-product matrix and multiply
+ * NOTE: 
+ * Faster than separately creating a cross-product matrix and multiplying.
+ */
+static inline void vmathV3CrossMatrixMul( VmathMatrix3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat );
+
+/*
+ * Linear interpolation between two 3-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Spherical linear interpolation between two 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 );
+
+/*
+ * Conditionally select between two 3-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 );
+
+/*
+ * Store x, y, and z elements of a 3-D vector in the first three words of a quadword.
+ * The value of the fourth word (the word with the highest address) remains unchanged
+ */
+static inline void vmathV3StoreXYZ( const VmathVector3 *vec, vec_float4 *quad );
+
+/*
+ * Load four three-float 3-D vectors, stored in three quadwords
+ */
+static inline void vmathV3LoadXYZArray( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads );
+
+/*
+ * Store four 3-D vectors in three quadwords
+ */
+static inline void vmathV3StoreXYZArray( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, vec_float4 *threeQuads );
+
+/*
+ * Store eight 3-D vectors as half-floats
+ */
+static inline void vmathV3StoreHalfFloats( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, const VmathVector3 *vec4, const VmathVector3 *vec5, const VmathVector3 *vec6, const VmathVector3 *vec7, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV3Print( const VmathVector3 *vec );
+
+/*
+ * Print a 3-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV3Prints( const VmathVector3 *vec, const char *name );
+
+#endif
+
+/*
+ * Copy a 4-D vector
+ */
+static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Construct a 4-D vector from x, y, z, and w elements
+ */
+static inline void vmathV4MakeFromElems( VmathVector4 *result, float x, float y, float z, float w );
+
+/*
+ * Construct a 4-D vector from a 3-D vector and a scalar
+ */
+static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float w );
+
+/*
+ * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+ */
+static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec );
+
+/*
+ * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+ */
+static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt );
+
+/*
+ * Copy elements from a quaternion into a 4-D vector
+ */
+static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat );
+
+/*
+ * Set all elements of a 4-D vector to the same scalar value
+ */
+static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar );
+
+/*
+ * Set vector float data in a 4-D vector
+ */
+static inline void vmathV4MakeFrom128( VmathVector4 *result, vec_float4 vf4 );
+
+/*
+ * Get vector float data from a 4-D vector
+ */
+static inline vec_float4 vmathV4Get128( const VmathVector4 *vec );
+
+/*
+ * Set the x, y, and z elements of a 4-D vector
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec );
+
+/*
+ * Get the x, y, and z elements of a 4-D vector
+ */
+static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec );
+
+/*
+ * Set the x element of a 4-D vector
+ */
+static inline void vmathV4SetX( VmathVector4 *result, float x );
+
+/*
+ * Set the y element of a 4-D vector
+ */
+static inline void vmathV4SetY( VmathVector4 *result, float y );
+
+/*
+ * Set the z element of a 4-D vector
+ */
+static inline void vmathV4SetZ( VmathVector4 *result, float z );
+
+/*
+ * Set the w element of a 4-D vector
+ */
+static inline void vmathV4SetW( VmathVector4 *result, float w );
+
+/*
+ * Get the x element of a 4-D vector
+ */
+static inline float vmathV4GetX( const VmathVector4 *vec );
+
+/*
+ * Get the y element of a 4-D vector
+ */
+static inline float vmathV4GetY( const VmathVector4 *vec );
+
+/*
+ * Get the z element of a 4-D vector
+ */
+static inline float vmathV4GetZ( const VmathVector4 *vec );
+
+/*
+ * Get the w element of a 4-D vector
+ */
+static inline float vmathV4GetW( const VmathVector4 *vec );
+
+/*
+ * Set an x, y, z, or w element of a 4-D vector by index
+ */
+static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value );
+
+/*
+ * Get an x, y, z, or w element of a 4-D vector by index
+ */
+static inline float vmathV4GetElem( const VmathVector4 *vec, int idx );
+
+/*
+ * Add two 4-D vectors
+ */
+static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Subtract a 4-D vector from another 4-D vector
+ */
+static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Multiply a 4-D vector by a scalar
+ */
+static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar );
+
+/*
+ * Divide a 4-D vector by a scalar
+ */
+static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar );
+
+/*
+ * Negate all elements of a 4-D vector
+ */
+static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Construct x axis
+ */
+static inline void vmathV4MakeXAxis( VmathVector4 *result );
+
+/*
+ * Construct y axis
+ */
+static inline void vmathV4MakeYAxis( VmathVector4 *result );
+
+/*
+ * Construct z axis
+ */
+static inline void vmathV4MakeZAxis( VmathVector4 *result );
+
+/*
+ * Construct w axis
+ */
+static inline void vmathV4MakeWAxis( VmathVector4 *result );
+
+/*
+ * Multiply two 4-D vectors per element
+ */
+static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Divide two 4-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Compute the reciprocal of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Compute the square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Compute the reciprocal square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Compute the absolute value of a 4-D vector per element
+ */
+static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Copy sign from one 4-D vector to another, per element
+ */
+static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Maximum of two 4-D vectors per element
+ */
+static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Minimum of two 4-D vectors per element
+ */
+static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Maximum element of a 4-D vector
+ */
+static inline float vmathV4MaxElem( const VmathVector4 *vec );
+
+/*
+ * Minimum element of a 4-D vector
+ */
+static inline float vmathV4MinElem( const VmathVector4 *vec );
+
+/*
+ * Compute the sum of all elements of a 4-D vector
+ */
+static inline float vmathV4Sum( const VmathVector4 *vec );
+
+/*
+ * Compute the dot product of two 4-D vectors
+ */
+static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Compute the square of the length of a 4-D vector
+ */
+static inline float vmathV4LengthSqr( const VmathVector4 *vec );
+
+/*
+ * Compute the length of a 4-D vector
+ */
+static inline float vmathV4Length( const VmathVector4 *vec );
+
+/*
+ * Normalize a 4-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Outer product of two 4-D vectors
+ */
+static inline void vmathV4Outer( VmathMatrix4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Linear interpolation between two 4-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Spherical linear interpolation between two 4-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 );
+
+/*
+ * Conditionally select between two 4-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 );
+
+/*
+ * Store four 4-D vectors as half-floats
+ */
+static inline void vmathV4StoreHalfFloats( const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3, vec_ushort8 *twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV4Print( const VmathVector4 *vec );
+
+/*
+ * Print a 4-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV4Prints( const VmathVector4 *vec, const char *name );
+
+#endif
+
+/*
+ * Copy a 3-D point
+ */
+static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Construct a 3-D point from x, y, and z elements
+ */
+static inline void vmathP3MakeFromElems( VmathPoint3 *result, float x, float y, float z );
+
+/*
+ * Copy elements from a 3-D vector into a 3-D point
+ */
+static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec );
+
+/*
+ * Set all elements of a 3-D point to the same scalar value
+ */
+static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar );
+
+/*
+ * Set vector float data in a 3-D point
+ */
+static inline void vmathP3MakeFrom128( VmathPoint3 *result, vec_float4 vf4 );
+
+/*
+ * Get vector float data from a 3-D point
+ */
+static inline vec_float4 vmathP3Get128( const VmathPoint3 *pnt );
+
+/*
+ * Set the x element of a 3-D point
+ */
+static inline void vmathP3SetX( VmathPoint3 *result, float x );
+
+/*
+ * Set the y element of a 3-D point
+ */
+static inline void vmathP3SetY( VmathPoint3 *result, float y );
+
+/*
+ * Set the z element of a 3-D point
+ */
+static inline void vmathP3SetZ( VmathPoint3 *result, float z );
+
+/*
+ * Get the x element of a 3-D point
+ */
+static inline float vmathP3GetX( const VmathPoint3 *pnt );
+
+/*
+ * Get the y element of a 3-D point
+ */
+static inline float vmathP3GetY( const VmathPoint3 *pnt );
+
+/*
+ * Get the z element of a 3-D point
+ */
+static inline float vmathP3GetZ( const VmathPoint3 *pnt );
+
+/*
+ * Set an x, y, or z element of a 3-D point by index
+ */
+static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value );
+
+/*
+ * Get an x, y, or z element of a 3-D point by index
+ */
+static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx );
+
+/*
+ * Subtract a 3-D point from another 3-D point
+ */
+static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Add a 3-D point to a 3-D vector
+ */
+static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec );
+
+/*
+ * Subtract a 3-D vector from a 3-D point
+ */
+static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec );
+
+/*
+ * Multiply two 3-D points per element
+ */
+static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Divide two 3-D points per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Compute the reciprocal of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Compute the square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Compute the reciprocal square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Compute the absolute value of a 3-D point per element
+ */
+static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Copy sign from one 3-D point to another, per element
+ */
+static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Maximum of two 3-D points per element
+ */
+static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Minimum of two 3-D points per element
+ */
+static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Maximum element of a 3-D point
+ */
+static inline float vmathP3MaxElem( const VmathPoint3 *pnt );
+
+/*
+ * Minimum element of a 3-D point
+ */
+static inline float vmathP3MinElem( const VmathPoint3 *pnt );
+
+/*
+ * Compute the sum of all elements of a 3-D point
+ */
+static inline float vmathP3Sum( const VmathPoint3 *pnt );
+
+/*
+ * Apply uniform scale to a 3-D point
+ */
+static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal );
+
+/*
+ * Apply non-uniform scale to a 3-D point
+ */
+static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec );
+
+/*
+ * Scalar projection of a 3-D point on a unit-length 3-D vector
+ */
+static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec );
+
+/*
+ * Compute the square of the distance of a 3-D point from the coordinate-system origin
+ */
+static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt );
+
+/*
+ * Compute the distance of a 3-D point from the coordinate-system origin
+ */
+static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt );
+
+/*
+ * Compute the square of the distance between two 3-D points
+ */
+static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Compute the distance between two 3-D points
+ */
+static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Linear interpolation between two 3-D points
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Conditionally select between two 3-D points
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 );
+
+/*
+ * Store x, y, and z elements of a 3-D point in the first three words of a quadword.
+ * The value of the fourth word (the word with the highest address) remains unchanged
+ */
+static inline void vmathP3StoreXYZ( const VmathPoint3 *pnt, vec_float4 *quad );
+
+/*
+ * Load four three-float 3-D points, stored in three quadwords
+ */
+static inline void vmathP3LoadXYZArray( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads );
+
+/*
+ * Store four 3-D points in three quadwords
+ */
+static inline void vmathP3StoreXYZArray( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, vec_float4 *threeQuads );
+
+/*
+ * Store eight 3-D points as half-floats
+ */
+static inline void vmathP3StoreHalfFloats( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, const VmathPoint3 *pnt4, const VmathPoint3 *pnt5, const VmathPoint3 *pnt6, const VmathPoint3 *pnt7, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D point
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathP3Print( const VmathPoint3 *pnt );
+
+/*
+ * Print a 3-D point and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name );
+
+#endif
+
+/*
+ * Copy a quaternion
+ */
+static inline void vmathQCopy( VmathQuat *result, const VmathQuat *quat );
+
+/*
+ * Construct a quaternion from x, y, z, and w elements
+ */
+static inline void vmathQMakeFromElems( VmathQuat *result, float x, float y, float z, float w );
+
+/*
+ * Construct a quaternion from a 3-D vector and a scalar
+ */
+static inline void vmathQMakeFromV3Scalar( VmathQuat *result, const VmathVector3 *xyz, float w );
+
+/*
+ * Copy elements from a 4-D vector into a quaternion
+ */
+static inline void vmathQMakeFromV4( VmathQuat *result, const VmathVector4 *vec );
+
+/*
+ * Convert a rotation matrix to a unit-length quaternion
+ */
+static inline void vmathQMakeFromM3( VmathQuat *result, const VmathMatrix3 *rotMat );
+
+/*
+ * Set all elements of a quaternion to the same scalar value
+ */
+static inline void vmathQMakeFromScalar( VmathQuat *result, float scalar );
+
+/*
+ * Set vector float data in a quaternion
+ */
+static inline void vmathQMakeFrom128( VmathQuat *result, vec_float4 vf4 );
+
+/*
+ * Get vector float data from a quaternion
+ */
+static inline vec_float4 vmathQGet128( const VmathQuat *quat );
+
+/*
+ * Set the x, y, and z elements of a quaternion
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathQSetXYZ( VmathQuat *result, const VmathVector3 *vec );
+
+/*
+ * Get the x, y, and z elements of a quaternion
+ */
+static inline void vmathQGetXYZ( VmathVector3 *result, const VmathQuat *quat );
+
+/*
+ * Set the x element of a quaternion
+ */
+static inline void vmathQSetX( VmathQuat *result, float x );
+
+/*
+ * Set the y element of a quaternion
+ */
+static inline void vmathQSetY( VmathQuat *result, float y );
+
+/*
+ * Set the z element of a quaternion
+ */
+static inline void vmathQSetZ( VmathQuat *result, float z );
+
+/*
+ * Set the w element of a quaternion
+ */
+static inline void vmathQSetW( VmathQuat *result, float w );
+
+/*
+ * Get the x element of a quaternion
+ */
+static inline float vmathQGetX( const VmathQuat *quat );
+
+/*
+ * Get the y element of a quaternion
+ */
+static inline float vmathQGetY( const VmathQuat *quat );
+
+/*
+ * Get the z element of a quaternion
+ */
+static inline float vmathQGetZ( const VmathQuat *quat );
+
+/*
+ * Get the w element of a quaternion
+ */
+static inline float vmathQGetW( const VmathQuat *quat );
+
+/*
+ * Set an x, y, z, or w element of a quaternion by index
+ */
+static inline void vmathQSetElem( VmathQuat *result, int idx, float value );
+
+/*
+ * Get an x, y, z, or w element of a quaternion by index
+ */
+static inline float vmathQGetElem( const VmathQuat *quat, int idx );
+
+/*
+ * Add two quaternions
+ */
+static inline void vmathQAdd( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Subtract a quaternion from another quaternion
+ */
+static inline void vmathQSub( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Multiply two quaternions
+ */
+static inline void vmathQMul( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Multiply a quaternion by a scalar
+ */
+static inline void vmathQScalarMul( VmathQuat *result, const VmathQuat *quat, float scalar );
+
+/*
+ * Divide a quaternion by a scalar
+ */
+static inline void vmathQScalarDiv( VmathQuat *result, const VmathQuat *quat, float scalar );
+
+/*
+ * Negate all elements of a quaternion
+ */
+static inline void vmathQNeg( VmathQuat *result, const VmathQuat *quat );
+
+/*
+ * Construct an identity quaternion
+ */
+static inline void vmathQMakeIdentity( VmathQuat *result );
+
+/*
+ * Construct a quaternion to rotate between two unit-length 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+ */
+static inline void vmathQMakeRotationArc( VmathQuat *result, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 );
+
+/*
+ * Construct a quaternion to rotate around a unit-length 3-D vector
+ */
+static inline void vmathQMakeRotationAxis( VmathQuat *result, float radians, const VmathVector3 *unitVec );
+
+/*
+ * Construct a quaternion to rotate around the x axis
+ */
+static inline void vmathQMakeRotationX( VmathQuat *result, float radians );
+
+/*
+ * Construct a quaternion to rotate around the y axis
+ */
+static inline void vmathQMakeRotationY( VmathQuat *result, float radians );
+
+/*
+ * Construct a quaternion to rotate around the z axis
+ */
+static inline void vmathQMakeRotationZ( VmathQuat *result, float radians );
+
+/*
+ * Compute the conjugate of a quaternion
+ */
+static inline void vmathQConj( VmathQuat *result, const VmathQuat *quat );
+
+/*
+ * Use a unit-length quaternion to rotate a 3-D vector
+ */
+static inline void vmathQRotate( VmathVector3 *result, const VmathQuat *unitQuat, const VmathVector3 *vec );
+
+/*
+ * Compute the dot product of two quaternions
+ */
+static inline float vmathQDot( const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Compute the norm of a quaternion
+ */
+static inline float vmathQNorm( const VmathQuat *quat );
+
+/*
+ * Compute the length of a quaternion
+ */
+static inline float vmathQLength( const VmathQuat *quat );
+
+/*
+ * Normalize a quaternion
+ * NOTE: 
+ * The result is unpredictable when all elements of quat are at or near zero.
+ */
+static inline void vmathQNormalize( VmathQuat *result, const VmathQuat *quat );
+
+/*
+ * Linear interpolation between two quaternions
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathQLerp( VmathQuat *result, float t, const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Spherical linear interpolation between two quaternions
+ * NOTE: 
+ * Interpolates along the shortest path between orientations.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathQSlerp( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1 );
+
+/*
+ * Spherical quadrangle interpolation
+ */
+static inline void vmathQSquad( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1, const VmathQuat *unitQuat2, const VmathQuat *unitQuat3 );
+
+/*
+ * Conditionally select between two quaternions
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline void vmathQSelect( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a quaternion
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathQPrint( const VmathQuat *quat );
+
+/*
+ * Print a quaternion and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathQPrints( const VmathQuat *quat, const char *name );
+
+#endif
+
+/*
+ * Copy a 3x3 matrix
+ */
+static inline void vmathM3Copy( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Construct a 3x3 matrix containing the specified columns
+ */
+static inline void vmathM3MakeFromCols( VmathMatrix3 *result, const VmathVector3 *col0, const VmathVector3 *col1, const VmathVector3 *col2 );
+
+/*
+ * Construct a 3x3 rotation matrix from a unit-length quaternion
+ */
+static inline void vmathM3MakeFromQ( VmathMatrix3 *result, const VmathQuat *unitQuat );
+
+/*
+ * Set all elements of a 3x3 matrix to the same scalar value
+ */
+static inline void vmathM3MakeFromScalar( VmathMatrix3 *result, float scalar );
+
+/*
+ * Set column 0 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol0( VmathMatrix3 *result, const VmathVector3 *col0 );
+
+/*
+ * Set column 1 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol1( VmathMatrix3 *result, const VmathVector3 *col1 );
+
+/*
+ * Set column 2 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol2( VmathMatrix3 *result, const VmathVector3 *col2 );
+
+/*
+ * Get column 0 of a 3x3 matrix
+ */
+static inline void vmathM3GetCol0( VmathVector3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Get column 1 of a 3x3 matrix
+ */
+static inline void vmathM3GetCol1( VmathVector3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Get column 2 of a 3x3 matrix
+ */
+static inline void vmathM3GetCol2( VmathVector3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Set the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3SetCol( VmathMatrix3 *result, int col, const VmathVector3 *vec );
+
+/*
+ * Set the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3SetRow( VmathMatrix3 *result, int row, const VmathVector3 *vec );
+
+/*
+ * Get the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3GetCol( VmathVector3 *result, const VmathMatrix3 *mat, int col );
+
+/*
+ * Get the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3GetRow( VmathVector3 *result, const VmathMatrix3 *mat, int row );
+
+/*
+ * Set the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline void vmathM3SetElem( VmathMatrix3 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline float vmathM3GetElem( const VmathMatrix3 *mat, int col, int row );
+
+/*
+ * Add two 3x3 matrices
+ */
+static inline void vmathM3Add( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
+
+/*
+ * Subtract a 3x3 matrix from another 3x3 matrix
+ */
+static inline void vmathM3Sub( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
+
+/*
+ * Negate all elements of a 3x3 matrix
+ */
+static inline void vmathM3Neg( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Multiply a 3x3 matrix by a scalar
+ */
+static inline void vmathM3ScalarMul( VmathMatrix3 *result, const VmathMatrix3 *mat, float scalar );
+
+/*
+ * Multiply a 3x3 matrix by a 3-D vector
+ */
+static inline void vmathM3MulV3( VmathVector3 *result, const VmathMatrix3 *mat, const VmathVector3 *vec );
+
+/*
+ * Multiply two 3x3 matrices
+ */
+static inline void vmathM3Mul( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
+
+/*
+ * Construct an identity 3x3 matrix
+ */
+static inline void vmathM3MakeIdentity( VmathMatrix3 *result );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x axis
+ */
+static inline void vmathM3MakeRotationX( VmathMatrix3 *result, float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the y axis
+ */
+static inline void vmathM3MakeRotationY( VmathMatrix3 *result, float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the z axis
+ */
+static inline void vmathM3MakeRotationZ( VmathMatrix3 *result, float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathM3MakeRotationZYX( VmathMatrix3 *result, const VmathVector3 *radiansXYZ );
+
+/*
+ * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathM3MakeRotationAxis( VmathMatrix3 *result, float radians, const VmathVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathM3MakeRotationQ( VmathMatrix3 *result, const VmathQuat *unitQuat );
+
+/*
+ * Construct a 3x3 matrix to perform scaling
+ */
+static inline void vmathM3MakeScale( VmathMatrix3 *result, const VmathVector3 *scaleVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathM3AppendScale( VmathMatrix3 *result, const VmathMatrix3 *mat, const VmathVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathM3PrependScale( VmathMatrix3 *result, const VmathVector3 *scaleVec, const VmathMatrix3 *mat );
+
+/*
+ * Multiply two 3x3 matrices per element
+ */
+static inline void vmathM3MulPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
+
+/*
+ * Compute the absolute value of a 3x3 matrix per element
+ */
+static inline void vmathM3AbsPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Transpose of a 3x3 matrix
+ */
+static inline void vmathM3Transpose( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Compute the inverse of a 3x3 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathM3Inverse( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Determinant of a 3x3 matrix
+ */
+static inline float vmathM3Determinant( const VmathMatrix3 *mat );
+
+/*
+ * Conditionally select between two 3x3 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline void vmathM3Select( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x3 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM3Print( const VmathMatrix3 *mat );
+
+/*
+ * Print a 3x3 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM3Prints( const VmathMatrix3 *mat, const char *name );
+
+#endif
+
+/*
+ * Copy a 4x4 matrix
+ */
+static inline void vmathM4Copy( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Construct a 4x4 matrix containing the specified columns
+ */
+static inline void vmathM4MakeFromCols( VmathMatrix4 *result, const VmathVector4 *col0, const VmathVector4 *col1, const VmathVector4 *col2, const VmathVector4 *col3 );
+
+/*
+ * Construct a 4x4 matrix from a 3x4 transformation matrix
+ */
+static inline void vmathM4MakeFromT3( VmathMatrix4 *result, const VmathTransform3 *mat );
+
+/*
+ * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline void vmathM4MakeFromM3V3( VmathMatrix4 *result, const VmathMatrix3 *mat, const VmathVector3 *translateVec );
+
+/*
+ * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline void vmathM4MakeFromQV3( VmathMatrix4 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec );
+
+/*
+ * Set all elements of a 4x4 matrix to the same scalar value
+ */
+static inline void vmathM4MakeFromScalar( VmathMatrix4 *result, float scalar );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathM4SetUpper3x3( VmathMatrix4 *result, const VmathMatrix3 *mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 4x4 matrix
+ */
+static inline void vmathM4GetUpper3x3( VmathMatrix3 *result, const VmathMatrix4 *mat );
+
+/*
+ * Set translation component
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathM4SetTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec );
+
+/*
+ * Get the translation component of a 4x4 matrix
+ */
+static inline void vmathM4GetTranslation( VmathVector3 *result, const VmathMatrix4 *mat );
+
+/*
+ * Set column 0 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol0( VmathMatrix4 *result, const VmathVector4 *col0 );
+
+/*
+ * Set column 1 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol1( VmathMatrix4 *result, const VmathVector4 *col1 );
+
+/*
+ * Set column 2 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol2( VmathMatrix4 *result, const VmathVector4 *col2 );
+
+/*
+ * Set column 3 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol3( VmathMatrix4 *result, const VmathVector4 *col3 );
+
+/*
+ * Get column 0 of a 4x4 matrix
+ */
+static inline void vmathM4GetCol0( VmathVector4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Get column 1 of a 4x4 matrix
+ */
+static inline void vmathM4GetCol1( VmathVector4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Get column 2 of a 4x4 matrix
+ */
+static inline void vmathM4GetCol2( VmathVector4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Get column 3 of a 4x4 matrix
+ */
+static inline void vmathM4GetCol3( VmathVector4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Set the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4SetCol( VmathMatrix4 *result, int col, const VmathVector4 *vec );
+
+/*
+ * Set the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4SetRow( VmathMatrix4 *result, int row, const VmathVector4 *vec );
+
+/*
+ * Get the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4GetCol( VmathVector4 *result, const VmathMatrix4 *mat, int col );
+
+/*
+ * Get the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4GetRow( VmathVector4 *result, const VmathMatrix4 *mat, int row );
+
+/*
+ * Set the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline void vmathM4SetElem( VmathMatrix4 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline float vmathM4GetElem( const VmathMatrix4 *mat, int col, int row );
+
+/*
+ * Add two 4x4 matrices
+ */
+static inline void vmathM4Add( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
+
+/*
+ * Subtract a 4x4 matrix from another 4x4 matrix
+ */
+static inline void vmathM4Sub( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
+
+/*
+ * Negate all elements of a 4x4 matrix
+ */
+static inline void vmathM4Neg( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Multiply a 4x4 matrix by a scalar
+ */
+static inline void vmathM4ScalarMul( VmathMatrix4 *result, const VmathMatrix4 *mat, float scalar );
+
+/*
+ * Multiply a 4x4 matrix by a 4-D vector
+ */
+static inline void vmathM4MulV4( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector4 *vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D vector
+ */
+static inline void vmathM4MulV3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector3 *vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D point
+ */
+static inline void vmathM4MulP3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathPoint3 *pnt );
+
+/*
+ * Multiply two 4x4 matrices
+ */
+static inline void vmathM4Mul( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
+
+/*
+ * Multiply a 4x4 matrix by a 3x4 transformation matrix
+ */
+static inline void vmathM4MulT3( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathTransform3 *tfrm );
+
+/*
+ * Construct an identity 4x4 matrix
+ */
+static inline void vmathM4MakeIdentity( VmathMatrix4 *result );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x axis
+ */
+static inline void vmathM4MakeRotationX( VmathMatrix4 *result, float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the y axis
+ */
+static inline void vmathM4MakeRotationY( VmathMatrix4 *result, float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the z axis
+ */
+static inline void vmathM4MakeRotationZ( VmathMatrix4 *result, float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathM4MakeRotationZYX( VmathMatrix4 *result, const VmathVector3 *radiansXYZ );
+
+/*
+ * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathM4MakeRotationAxis( VmathMatrix4 *result, float radians, const VmathVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathM4MakeRotationQ( VmathMatrix4 *result, const VmathQuat *unitQuat );
+
+/*
+ * Construct a 4x4 matrix to perform scaling
+ */
+static inline void vmathM4MakeScale( VmathMatrix4 *result, const VmathVector3 *scaleVec );
+
+/*
+ * Construct a 4x4 matrix to perform translation
+ */
+static inline void vmathM4MakeTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec );
+
+/*
+ * Construct viewing matrix based on eye position, position looked at, and up direction
+ */
+static inline void vmathM4MakeLookAt( VmathMatrix4 *result, const VmathPoint3 *eyePos, const VmathPoint3 *lookAtPos, const VmathVector3 *upVec );
+
+/*
+ * Construct a perspective projection matrix
+ */
+static inline void vmathM4MakePerspective( VmathMatrix4 *result, float fovyRadians, float aspect, float zNear, float zFar );
+
+/*
+ * Construct a perspective projection matrix based on frustum
+ */
+static inline void vmathM4MakeFrustum( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar );
+
+/*
+ * Construct an orthographic projection matrix
+ */
+static inline void vmathM4MakeOrthographic( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar );
+
+/*
+ * Append (post-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathM4AppendScale( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathM4PrependScale( VmathMatrix4 *result, const VmathVector3 *scaleVec, const VmathMatrix4 *mat );
+
+/*
+ * Multiply two 4x4 matrices per element
+ */
+static inline void vmathM4MulPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
+
+/*
+ * Compute the absolute value of a 4x4 matrix per element
+ */
+static inline void vmathM4AbsPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Transpose of a 4x4 matrix
+ */
+static inline void vmathM4Transpose( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathM4Inverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathM4AffineInverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+ */
+static inline void vmathM4OrthoInverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Determinant of a 4x4 matrix
+ */
+static inline float vmathM4Determinant( const VmathMatrix4 *mat );
+
+/*
+ * Conditionally select between two 4x4 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline void vmathM4Select( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4x4 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM4Print( const VmathMatrix4 *mat );
+
+/*
+ * Print a 4x4 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM4Prints( const VmathMatrix4 *mat, const char *name );
+
+#endif
+
+/*
+ * Copy a 3x4 transformation matrix
+ */
+static inline void vmathT3Copy( VmathTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Construct a 3x4 transformation matrix containing the specified columns
+ */
+static inline void vmathT3MakeFromCols( VmathTransform3 *result, const VmathVector3 *col0, const VmathVector3 *col1, const VmathVector3 *col2, const VmathVector3 *col3 );
+
+/*
+ * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline void vmathT3MakeFromM3V3( VmathTransform3 *result, const VmathMatrix3 *tfrm, const VmathVector3 *translateVec );
+
+/*
+ * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline void vmathT3MakeFromQV3( VmathTransform3 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec );
+
+/*
+ * Set all elements of a 3x4 transformation matrix to the same scalar value
+ */
+static inline void vmathT3MakeFromScalar( VmathTransform3 *result, float scalar );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ */
+static inline void vmathT3SetUpper3x3( VmathTransform3 *result, const VmathMatrix3 *mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetUpper3x3( VmathMatrix3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Set translation component
+ */
+static inline void vmathT3SetTranslation( VmathTransform3 *result, const VmathVector3 *translateVec );
+
+/*
+ * Get the translation component of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetTranslation( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Set column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol0( VmathTransform3 *result, const VmathVector3 *col0 );
+
+/*
+ * Set column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol1( VmathTransform3 *result, const VmathVector3 *col1 );
+
+/*
+ * Set column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol2( VmathTransform3 *result, const VmathVector3 *col2 );
+
+/*
+ * Set column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol3( VmathTransform3 *result, const VmathVector3 *col3 );
+
+/*
+ * Get column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetCol0( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Get column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetCol1( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Get column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetCol2( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Get column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetCol3( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Set the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3SetCol( VmathTransform3 *result, int col, const VmathVector3 *vec );
+
+/*
+ * Set the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3SetRow( VmathTransform3 *result, int row, const VmathVector4 *vec );
+
+/*
+ * Get the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3GetCol( VmathVector3 *result, const VmathTransform3 *tfrm, int col );
+
+/*
+ * Get the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3GetRow( VmathVector4 *result, const VmathTransform3 *tfrm, int row );
+
+/*
+ * Set the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline void vmathT3SetElem( VmathTransform3 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline float vmathT3GetElem( const VmathTransform3 *tfrm, int col, int row );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D vector
+ */
+static inline void vmathT3MulV3( VmathVector3 *result, const VmathTransform3 *tfrm, const VmathVector3 *vec );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D point
+ */
+static inline void vmathT3MulP3( VmathPoint3 *result, const VmathTransform3 *tfrm, const VmathPoint3 *pnt );
+
+/*
+ * Multiply two 3x4 transformation matrices
+ */
+static inline void vmathT3Mul( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 );
+
+/*
+ * Construct an identity 3x4 transformation matrix
+ */
+static inline void vmathT3MakeIdentity( VmathTransform3 *result );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x axis
+ */
+static inline void vmathT3MakeRotationX( VmathTransform3 *result, float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the y axis
+ */
+static inline void vmathT3MakeRotationY( VmathTransform3 *result, float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the z axis
+ */
+static inline void vmathT3MakeRotationZ( VmathTransform3 *result, float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathT3MakeRotationZYX( VmathTransform3 *result, const VmathVector3 *radiansXYZ );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathT3MakeRotationAxis( VmathTransform3 *result, float radians, const VmathVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathT3MakeRotationQ( VmathTransform3 *result, const VmathQuat *unitQuat );
+
+/*
+ * Construct a 3x4 transformation matrix to perform scaling
+ */
+static inline void vmathT3MakeScale( VmathTransform3 *result, const VmathVector3 *scaleVec );
+
+/*
+ * Construct a 3x4 transformation matrix to perform translation
+ */
+static inline void vmathT3MakeTranslation( VmathTransform3 *result, const VmathVector3 *translateVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathT3AppendScale( VmathTransform3 *result, const VmathTransform3 *tfrm, const VmathVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathT3PrependScale( VmathTransform3 *result, const VmathVector3 *scaleVec, const VmathTransform3 *tfrm );
+
+/*
+ * Multiply two 3x4 transformation matrices per element
+ */
+static inline void vmathT3MulPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 );
+
+/*
+ * Compute the absolute value of a 3x4 transformation matrix per element
+ */
+static inline void vmathT3AbsPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Inverse of a 3x4 transformation matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+ */
+static inline void vmathT3Inverse( VmathTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+ */
+static inline void vmathT3OrthoInverse( VmathTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Conditionally select between two 3x4 transformation matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline void vmathT3Select( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x4 transformation matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathT3Print( const VmathTransform3 *tfrm );
+
+/*
+ * Print a 3x4 transformation matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathT3Prints( const VmathTransform3 *tfrm, const char *name );
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#include "vec_aos.h"
+#include "quat_aos.h"
+#include "mat_aos.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_aos_v.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_aos_v.h
index e92ce9101..a8599172b 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_aos_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_aos_v.h
@@ -1,1925 +1,1925 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_AOS_C_V_PPU_H
-#define _VECTORMATH_AOS_C_V_PPU_H
-
-#include <math.h>
-#include <altivec.h>
-#include "vec_types.h"
-
-#ifdef _VECTORMATH_DEBUG
-#include <stdio.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#ifndef _VECTORMATH_AOS_C_TYPES_H
-#define _VECTORMATH_AOS_C_TYPES_H
-
-/* A 3-D vector in array-of-structures format
- */
-typedef struct _VmathVector3
-{
-    vec_float4 vec128;
-} VmathVector3;
-
-/* A 4-D vector in array-of-structures format
- */
-typedef struct _VmathVector4
-{
-    vec_float4 vec128;
-} VmathVector4;
-
-/* A 3-D point in array-of-structures format
- */
-typedef struct _VmathPoint3
-{
-    vec_float4 vec128;
-} VmathPoint3;
-
-/* A quaternion in array-of-structures format
- */
-typedef struct _VmathQuat
-{
-    vec_float4 vec128;
-} VmathQuat;
-
-/* A 3x3 matrix in array-of-structures format
- */
-typedef struct _VmathMatrix3
-{
-    VmathVector3 col0;
-    VmathVector3 col1;
-    VmathVector3 col2;
-} VmathMatrix3;
-
-/* A 4x4 matrix in array-of-structures format
- */
-typedef struct _VmathMatrix4
-{
-    VmathVector4 col0;
-    VmathVector4 col1;
-    VmathVector4 col2;
-    VmathVector4 col3;
-} VmathMatrix4;
-
-/* A 3x4 transformation matrix in array-of-structures format
- */
-typedef struct _VmathTransform3
-{
-    VmathVector3 col0;
-    VmathVector3 col1;
-    VmathVector3 col2;
-    VmathVector3 col3;
-} VmathTransform3;
-
-#endif
-
-/*
- * Construct a 3-D vector from x, y, and z elements
- */
-static inline VmathVector3 vmathV3MakeFromElems_V( float x, float y, float z );
-
-/*
- * Copy elements from a 3-D point into a 3-D vector
- */
-static inline VmathVector3 vmathV3MakeFromP3_V( VmathPoint3 pnt );
-
-/*
- * Set all elements of a 3-D vector to the same scalar value
- */
-static inline VmathVector3 vmathV3MakeFromScalar_V( float scalar );
-
-/*
- * Set vector float data in a 3-D vector
- */
-static inline VmathVector3 vmathV3MakeFrom128_V( vec_float4 vf4 );
-
-/*
- * Get vector float data from a 3-D vector
- */
-static inline vec_float4 vmathV3Get128_V( VmathVector3 vec );
-
-/*
- * Set the x element of a 3-D vector
- */
-static inline void vmathV3SetX_V( VmathVector3 *result, float x );
-
-/*
- * Set the y element of a 3-D vector
- */
-static inline void vmathV3SetY_V( VmathVector3 *result, float y );
-
-/*
- * Set the z element of a 3-D vector
- */
-static inline void vmathV3SetZ_V( VmathVector3 *result, float z );
-
-/*
- * Get the x element of a 3-D vector
- */
-static inline float vmathV3GetX_V( VmathVector3 vec );
-
-/*
- * Get the y element of a 3-D vector
- */
-static inline float vmathV3GetY_V( VmathVector3 vec );
-
-/*
- * Get the z element of a 3-D vector
- */
-static inline float vmathV3GetZ_V( VmathVector3 vec );
-
-/*
- * Set an x, y, or z element of a 3-D vector by index
- */
-static inline void vmathV3SetElem_V( VmathVector3 *result, int idx, float value );
-
-/*
- * Get an x, y, or z element of a 3-D vector by index
- */
-static inline float vmathV3GetElem_V( VmathVector3 vec, int idx );
-
-/*
- * Add two 3-D vectors
- */
-static inline VmathVector3 vmathV3Add_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Subtract a 3-D vector from another 3-D vector
- */
-static inline VmathVector3 vmathV3Sub_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Add a 3-D vector to a 3-D point
- */
-static inline VmathPoint3 vmathV3AddP3_V( VmathVector3 vec, VmathPoint3 pnt );
-
-/*
- * Multiply a 3-D vector by a scalar
- */
-static inline VmathVector3 vmathV3ScalarMul_V( VmathVector3 vec, float scalar );
-
-/*
- * Divide a 3-D vector by a scalar
- */
-static inline VmathVector3 vmathV3ScalarDiv_V( VmathVector3 vec, float scalar );
-
-/*
- * Negate all elements of a 3-D vector
- */
-static inline VmathVector3 vmathV3Neg_V( VmathVector3 vec );
-
-/*
- * Construct x axis
- */
-static inline VmathVector3 vmathV3MakeXAxis_V( );
-
-/*
- * Construct y axis
- */
-static inline VmathVector3 vmathV3MakeYAxis_V( );
-
-/*
- * Construct z axis
- */
-static inline VmathVector3 vmathV3MakeZAxis_V( );
-
-/*
- * Multiply two 3-D vectors per element
- */
-static inline VmathVector3 vmathV3MulPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Divide two 3-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathVector3 vmathV3DivPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Compute the reciprocal of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathVector3 vmathV3RecipPerElem_V( VmathVector3 vec );
-
-/*
- * Compute the square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathVector3 vmathV3SqrtPerElem_V( VmathVector3 vec );
-
-/*
- * Compute the reciprocal square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathVector3 vmathV3RsqrtPerElem_V( VmathVector3 vec );
-
-/*
- * Compute the absolute value of a 3-D vector per element
- */
-static inline VmathVector3 vmathV3AbsPerElem_V( VmathVector3 vec );
-
-/*
- * Copy sign from one 3-D vector to another, per element
- */
-static inline VmathVector3 vmathV3CopySignPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Maximum of two 3-D vectors per element
- */
-static inline VmathVector3 vmathV3MaxPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Minimum of two 3-D vectors per element
- */
-static inline VmathVector3 vmathV3MinPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Maximum element of a 3-D vector
- */
-static inline float vmathV3MaxElem_V( VmathVector3 vec );
-
-/*
- * Minimum element of a 3-D vector
- */
-static inline float vmathV3MinElem_V( VmathVector3 vec );
-
-/*
- * Compute the sum of all elements of a 3-D vector
- */
-static inline float vmathV3Sum_V( VmathVector3 vec );
-
-/*
- * Compute the dot product of two 3-D vectors
- */
-static inline float vmathV3Dot_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Compute the square of the length of a 3-D vector
- */
-static inline float vmathV3LengthSqr_V( VmathVector3 vec );
-
-/*
- * Compute the length of a 3-D vector
- */
-static inline float vmathV3Length_V( VmathVector3 vec );
-
-/*
- * Normalize a 3-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline VmathVector3 vmathV3Normalize_V( VmathVector3 vec );
-
-/*
- * Compute cross product of two 3-D vectors
- */
-static inline VmathVector3 vmathV3Cross_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Outer product of two 3-D vectors
- */
-static inline VmathMatrix3 vmathV3Outer_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Pre-multiply a row vector by a 3x3 matrix
- * NOTE: 
- * Slower than column post-multiply.
- */
-static inline VmathVector3 vmathV3RowMul_V( VmathVector3 vec, VmathMatrix3 mat );
-
-/*
- * Cross-product matrix of a 3-D vector
- */
-static inline VmathMatrix3 vmathV3CrossMatrix_V( VmathVector3 vec );
-
-/*
- * Create cross-product matrix and multiply
- * NOTE: 
- * Faster than separately creating a cross-product matrix and multiplying.
- */
-static inline VmathMatrix3 vmathV3CrossMatrixMul_V( VmathVector3 vec, VmathMatrix3 mat );
-
-/*
- * Linear interpolation between two 3-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathVector3 vmathV3Lerp_V( float t, VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Spherical linear interpolation between two 3-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathVector3 vmathV3Slerp_V( float t, VmathVector3 unitVec0, VmathVector3 unitVec1 );
-
-/*
- * Conditionally select between two 3-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline VmathVector3 vmathV3Select_V( VmathVector3 vec0, VmathVector3 vec1, unsigned int select1 );
-
-/*
- * Store x, y, and z elements of a 3-D vector in the first three words of a quadword.
- * The value of the fourth word (the word with the highest address) remains unchanged
- */
-static inline void vmathV3StoreXYZ_V( VmathVector3 vec, vec_float4 *quad );
-
-/*
- * Load four three-float 3-D vectors, stored in three quadwords
- */
-static inline void vmathV3LoadXYZArray_V( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads );
-
-/*
- * Store four 3-D vectors in three quadwords
- */
-static inline void vmathV3StoreXYZArray_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, vec_float4 *threeQuads );
-
-/*
- * Store eight 3-D vectors as half-floats
- */
-static inline void vmathV3StoreHalfFloats_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, VmathVector3 vec4, VmathVector3 vec5, VmathVector3 vec6, VmathVector3 vec7, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV3Print_V( VmathVector3 vec );
-
-/*
- * Print a 3-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV3Prints_V( VmathVector3 vec, const char *name );
-
-#endif
-
-/*
- * Construct a 4-D vector from x, y, z, and w elements
- */
-static inline VmathVector4 vmathV4MakeFromElems_V( float x, float y, float z, float w );
-
-/*
- * Construct a 4-D vector from a 3-D vector and a scalar
- */
-static inline VmathVector4 vmathV4MakeFromV3Scalar_V( VmathVector3 xyz, float w );
-
-/*
- * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
- */
-static inline VmathVector4 vmathV4MakeFromV3_V( VmathVector3 vec );
-
-/*
- * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
- */
-static inline VmathVector4 vmathV4MakeFromP3_V( VmathPoint3 pnt );
-
-/*
- * Copy elements from a quaternion into a 4-D vector
- */
-static inline VmathVector4 vmathV4MakeFromQ_V( VmathQuat quat );
-
-/*
- * Set all elements of a 4-D vector to the same scalar value
- */
-static inline VmathVector4 vmathV4MakeFromScalar_V( float scalar );
-
-/*
- * Set vector float data in a 4-D vector
- */
-static inline VmathVector4 vmathV4MakeFrom128_V( vec_float4 vf4 );
-
-/*
- * Get vector float data from a 4-D vector
- */
-static inline vec_float4 vmathV4Get128_V( VmathVector4 vec );
-
-/*
- * Set the x, y, and z elements of a 4-D vector
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathV4SetXYZ_V( VmathVector4 *result, VmathVector3 vec );
-
-/*
- * Get the x, y, and z elements of a 4-D vector
- */
-static inline VmathVector3 vmathV4GetXYZ_V( VmathVector4 vec );
-
-/*
- * Set the x element of a 4-D vector
- */
-static inline void vmathV4SetX_V( VmathVector4 *result, float x );
-
-/*
- * Set the y element of a 4-D vector
- */
-static inline void vmathV4SetY_V( VmathVector4 *result, float y );
-
-/*
- * Set the z element of a 4-D vector
- */
-static inline void vmathV4SetZ_V( VmathVector4 *result, float z );
-
-/*
- * Set the w element of a 4-D vector
- */
-static inline void vmathV4SetW_V( VmathVector4 *result, float w );
-
-/*
- * Get the x element of a 4-D vector
- */
-static inline float vmathV4GetX_V( VmathVector4 vec );
-
-/*
- * Get the y element of a 4-D vector
- */
-static inline float vmathV4GetY_V( VmathVector4 vec );
-
-/*
- * Get the z element of a 4-D vector
- */
-static inline float vmathV4GetZ_V( VmathVector4 vec );
-
-/*
- * Get the w element of a 4-D vector
- */
-static inline float vmathV4GetW_V( VmathVector4 vec );
-
-/*
- * Set an x, y, z, or w element of a 4-D vector by index
- */
-static inline void vmathV4SetElem_V( VmathVector4 *result, int idx, float value );
-
-/*
- * Get an x, y, z, or w element of a 4-D vector by index
- */
-static inline float vmathV4GetElem_V( VmathVector4 vec, int idx );
-
-/*
- * Add two 4-D vectors
- */
-static inline VmathVector4 vmathV4Add_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Subtract a 4-D vector from another 4-D vector
- */
-static inline VmathVector4 vmathV4Sub_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Multiply a 4-D vector by a scalar
- */
-static inline VmathVector4 vmathV4ScalarMul_V( VmathVector4 vec, float scalar );
-
-/*
- * Divide a 4-D vector by a scalar
- */
-static inline VmathVector4 vmathV4ScalarDiv_V( VmathVector4 vec, float scalar );
-
-/*
- * Negate all elements of a 4-D vector
- */
-static inline VmathVector4 vmathV4Neg_V( VmathVector4 vec );
-
-/*
- * Construct x axis
- */
-static inline VmathVector4 vmathV4MakeXAxis_V( );
-
-/*
- * Construct y axis
- */
-static inline VmathVector4 vmathV4MakeYAxis_V( );
-
-/*
- * Construct z axis
- */
-static inline VmathVector4 vmathV4MakeZAxis_V( );
-
-/*
- * Construct w axis
- */
-static inline VmathVector4 vmathV4MakeWAxis_V( );
-
-/*
- * Multiply two 4-D vectors per element
- */
-static inline VmathVector4 vmathV4MulPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Divide two 4-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathVector4 vmathV4DivPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Compute the reciprocal of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathVector4 vmathV4RecipPerElem_V( VmathVector4 vec );
-
-/*
- * Compute the square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathVector4 vmathV4SqrtPerElem_V( VmathVector4 vec );
-
-/*
- * Compute the reciprocal square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathVector4 vmathV4RsqrtPerElem_V( VmathVector4 vec );
-
-/*
- * Compute the absolute value of a 4-D vector per element
- */
-static inline VmathVector4 vmathV4AbsPerElem_V( VmathVector4 vec );
-
-/*
- * Copy sign from one 4-D vector to another, per element
- */
-static inline VmathVector4 vmathV4CopySignPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Maximum of two 4-D vectors per element
- */
-static inline VmathVector4 vmathV4MaxPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Minimum of two 4-D vectors per element
- */
-static inline VmathVector4 vmathV4MinPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Maximum element of a 4-D vector
- */
-static inline float vmathV4MaxElem_V( VmathVector4 vec );
-
-/*
- * Minimum element of a 4-D vector
- */
-static inline float vmathV4MinElem_V( VmathVector4 vec );
-
-/*
- * Compute the sum of all elements of a 4-D vector
- */
-static inline float vmathV4Sum_V( VmathVector4 vec );
-
-/*
- * Compute the dot product of two 4-D vectors
- */
-static inline float vmathV4Dot_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Compute the square of the length of a 4-D vector
- */
-static inline float vmathV4LengthSqr_V( VmathVector4 vec );
-
-/*
- * Compute the length of a 4-D vector
- */
-static inline float vmathV4Length_V( VmathVector4 vec );
-
-/*
- * Normalize a 4-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline VmathVector4 vmathV4Normalize_V( VmathVector4 vec );
-
-/*
- * Outer product of two 4-D vectors
- */
-static inline VmathMatrix4 vmathV4Outer_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Linear interpolation between two 4-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathVector4 vmathV4Lerp_V( float t, VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Spherical linear interpolation between two 4-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathVector4 vmathV4Slerp_V( float t, VmathVector4 unitVec0, VmathVector4 unitVec1 );
-
-/*
- * Conditionally select between two 4-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline VmathVector4 vmathV4Select_V( VmathVector4 vec0, VmathVector4 vec1, unsigned int select1 );
-
-/*
- * Store four 4-D vectors as half-floats
- */
-static inline void vmathV4StoreHalfFloats_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3, vec_ushort8 *twoQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV4Print_V( VmathVector4 vec );
-
-/*
- * Print a 4-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV4Prints_V( VmathVector4 vec, const char *name );
-
-#endif
-
-/*
- * Construct a 3-D point from x, y, and z elements
- */
-static inline VmathPoint3 vmathP3MakeFromElems_V( float x, float y, float z );
-
-/*
- * Copy elements from a 3-D vector into a 3-D point
- */
-static inline VmathPoint3 vmathP3MakeFromV3_V( VmathVector3 vec );
-
-/*
- * Set all elements of a 3-D point to the same scalar value
- */
-static inline VmathPoint3 vmathP3MakeFromScalar_V( float scalar );
-
-/*
- * Set vector float data in a 3-D point
- */
-static inline VmathPoint3 vmathP3MakeFrom128_V( vec_float4 vf4 );
-
-/*
- * Get vector float data from a 3-D point
- */
-static inline vec_float4 vmathP3Get128_V( VmathPoint3 pnt );
-
-/*
- * Set the x element of a 3-D point
- */
-static inline void vmathP3SetX_V( VmathPoint3 *result, float x );
-
-/*
- * Set the y element of a 3-D point
- */
-static inline void vmathP3SetY_V( VmathPoint3 *result, float y );
-
-/*
- * Set the z element of a 3-D point
- */
-static inline void vmathP3SetZ_V( VmathPoint3 *result, float z );
-
-/*
- * Get the x element of a 3-D point
- */
-static inline float vmathP3GetX_V( VmathPoint3 pnt );
-
-/*
- * Get the y element of a 3-D point
- */
-static inline float vmathP3GetY_V( VmathPoint3 pnt );
-
-/*
- * Get the z element of a 3-D point
- */
-static inline float vmathP3GetZ_V( VmathPoint3 pnt );
-
-/*
- * Set an x, y, or z element of a 3-D point by index
- */
-static inline void vmathP3SetElem_V( VmathPoint3 *result, int idx, float value );
-
-/*
- * Get an x, y, or z element of a 3-D point by index
- */
-static inline float vmathP3GetElem_V( VmathPoint3 pnt, int idx );
-
-/*
- * Subtract a 3-D point from another 3-D point
- */
-static inline VmathVector3 vmathP3Sub_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Add a 3-D point to a 3-D vector
- */
-static inline VmathPoint3 vmathP3AddV3_V( VmathPoint3 pnt, VmathVector3 vec );
-
-/*
- * Subtract a 3-D vector from a 3-D point
- */
-static inline VmathPoint3 vmathP3SubV3_V( VmathPoint3 pnt, VmathVector3 vec );
-
-/*
- * Multiply two 3-D points per element
- */
-static inline VmathPoint3 vmathP3MulPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Divide two 3-D points per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathPoint3 vmathP3DivPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Compute the reciprocal of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathPoint3 vmathP3RecipPerElem_V( VmathPoint3 pnt );
-
-/*
- * Compute the square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathPoint3 vmathP3SqrtPerElem_V( VmathPoint3 pnt );
-
-/*
- * Compute the reciprocal square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathPoint3 vmathP3RsqrtPerElem_V( VmathPoint3 pnt );
-
-/*
- * Compute the absolute value of a 3-D point per element
- */
-static inline VmathPoint3 vmathP3AbsPerElem_V( VmathPoint3 pnt );
-
-/*
- * Copy sign from one 3-D point to another, per element
- */
-static inline VmathPoint3 vmathP3CopySignPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Maximum of two 3-D points per element
- */
-static inline VmathPoint3 vmathP3MaxPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Minimum of two 3-D points per element
- */
-static inline VmathPoint3 vmathP3MinPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Maximum element of a 3-D point
- */
-static inline float vmathP3MaxElem_V( VmathPoint3 pnt );
-
-/*
- * Minimum element of a 3-D point
- */
-static inline float vmathP3MinElem_V( VmathPoint3 pnt );
-
-/*
- * Compute the sum of all elements of a 3-D point
- */
-static inline float vmathP3Sum_V( VmathPoint3 pnt );
-
-/*
- * Apply uniform scale to a 3-D point
- */
-static inline VmathPoint3 vmathP3Scale_V( VmathPoint3 pnt, float scaleVal );
-
-/*
- * Apply non-uniform scale to a 3-D point
- */
-static inline VmathPoint3 vmathP3NonUniformScale_V( VmathPoint3 pnt, VmathVector3 scaleVec );
-
-/*
- * Scalar projection of a 3-D point on a unit-length 3-D vector
- */
-static inline float vmathP3Projection_V( VmathPoint3 pnt, VmathVector3 unitVec );
-
-/*
- * Compute the square of the distance of a 3-D point from the coordinate-system origin
- */
-static inline float vmathP3DistSqrFromOrigin_V( VmathPoint3 pnt );
-
-/*
- * Compute the distance of a 3-D point from the coordinate-system origin
- */
-static inline float vmathP3DistFromOrigin_V( VmathPoint3 pnt );
-
-/*
- * Compute the square of the distance between two 3-D points
- */
-static inline float vmathP3DistSqr_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Compute the distance between two 3-D points
- */
-static inline float vmathP3Dist_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Linear interpolation between two 3-D points
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathPoint3 vmathP3Lerp_V( float t, VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Conditionally select between two 3-D points
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline VmathPoint3 vmathP3Select_V( VmathPoint3 pnt0, VmathPoint3 pnt1, unsigned int select1 );
-
-/*
- * Store x, y, and z elements of a 3-D point in the first three words of a quadword.
- * The value of the fourth word (the word with the highest address) remains unchanged
- */
-static inline void vmathP3StoreXYZ_V( VmathPoint3 pnt, vec_float4 *quad );
-
-/*
- * Load four three-float 3-D points, stored in three quadwords
- */
-static inline void vmathP3LoadXYZArray_V( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads );
-
-/*
- * Store four 3-D points in three quadwords
- */
-static inline void vmathP3StoreXYZArray_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, vec_float4 *threeQuads );
-
-/*
- * Store eight 3-D points as half-floats
- */
-static inline void vmathP3StoreHalfFloats_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, VmathPoint3 pnt4, VmathPoint3 pnt5, VmathPoint3 pnt6, VmathPoint3 pnt7, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D point
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathP3Print_V( VmathPoint3 pnt );
-
-/*
- * Print a 3-D point and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathP3Prints_V( VmathPoint3 pnt, const char *name );
-
-#endif
-
-/*
- * Construct a quaternion from x, y, z, and w elements
- */
-static inline VmathQuat vmathQMakeFromElems_V( float x, float y, float z, float w );
-
-/*
- * Construct a quaternion from a 3-D vector and a scalar
- */
-static inline VmathQuat vmathQMakeFromV3Scalar_V( VmathVector3 xyz, float w );
-
-/*
- * Copy elements from a 4-D vector into a quaternion
- */
-static inline VmathQuat vmathQMakeFromV4_V( VmathVector4 vec );
-
-/*
- * Convert a rotation matrix to a unit-length quaternion
- */
-static inline VmathQuat vmathQMakeFromM3_V( VmathMatrix3 rotMat );
-
-/*
- * Set all elements of a quaternion to the same scalar value
- */
-static inline VmathQuat vmathQMakeFromScalar_V( float scalar );
-
-/*
- * Set vector float data in a quaternion
- */
-static inline VmathQuat vmathQMakeFrom128_V( vec_float4 vf4 );
-
-/*
- * Get vector float data from a quaternion
- */
-static inline vec_float4 vmathQGet128_V( VmathQuat quat );
-
-/*
- * Set the x, y, and z elements of a quaternion
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathQSetXYZ_V( VmathQuat *result, VmathVector3 vec );
-
-/*
- * Get the x, y, and z elements of a quaternion
- */
-static inline VmathVector3 vmathQGetXYZ_V( VmathQuat quat );
-
-/*
- * Set the x element of a quaternion
- */
-static inline void vmathQSetX_V( VmathQuat *result, float x );
-
-/*
- * Set the y element of a quaternion
- */
-static inline void vmathQSetY_V( VmathQuat *result, float y );
-
-/*
- * Set the z element of a quaternion
- */
-static inline void vmathQSetZ_V( VmathQuat *result, float z );
-
-/*
- * Set the w element of a quaternion
- */
-static inline void vmathQSetW_V( VmathQuat *result, float w );
-
-/*
- * Get the x element of a quaternion
- */
-static inline float vmathQGetX_V( VmathQuat quat );
-
-/*
- * Get the y element of a quaternion
- */
-static inline float vmathQGetY_V( VmathQuat quat );
-
-/*
- * Get the z element of a quaternion
- */
-static inline float vmathQGetZ_V( VmathQuat quat );
-
-/*
- * Get the w element of a quaternion
- */
-static inline float vmathQGetW_V( VmathQuat quat );
-
-/*
- * Set an x, y, z, or w element of a quaternion by index
- */
-static inline void vmathQSetElem_V( VmathQuat *result, int idx, float value );
-
-/*
- * Get an x, y, z, or w element of a quaternion by index
- */
-static inline float vmathQGetElem_V( VmathQuat quat, int idx );
-
-/*
- * Add two quaternions
- */
-static inline VmathQuat vmathQAdd_V( VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Subtract a quaternion from another quaternion
- */
-static inline VmathQuat vmathQSub_V( VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Multiply two quaternions
- */
-static inline VmathQuat vmathQMul_V( VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Multiply a quaternion by a scalar
- */
-static inline VmathQuat vmathQScalarMul_V( VmathQuat quat, float scalar );
-
-/*
- * Divide a quaternion by a scalar
- */
-static inline VmathQuat vmathQScalarDiv_V( VmathQuat quat, float scalar );
-
-/*
- * Negate all elements of a quaternion
- */
-static inline VmathQuat vmathQNeg_V( VmathQuat quat );
-
-/*
- * Construct an identity quaternion
- */
-static inline VmathQuat vmathQMakeIdentity_V( );
-
-/*
- * Construct a quaternion to rotate between two unit-length 3-D vectors
- * NOTE: 
- * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
- */
-static inline VmathQuat vmathQMakeRotationArc_V( VmathVector3 unitVec0, VmathVector3 unitVec1 );
-
-/*
- * Construct a quaternion to rotate around a unit-length 3-D vector
- */
-static inline VmathQuat vmathQMakeRotationAxis_V( float radians, VmathVector3 unitVec );
-
-/*
- * Construct a quaternion to rotate around the x axis
- */
-static inline VmathQuat vmathQMakeRotationX_V( float radians );
-
-/*
- * Construct a quaternion to rotate around the y axis
- */
-static inline VmathQuat vmathQMakeRotationY_V( float radians );
-
-/*
- * Construct a quaternion to rotate around the z axis
- */
-static inline VmathQuat vmathQMakeRotationZ_V( float radians );
-
-/*
- * Compute the conjugate of a quaternion
- */
-static inline VmathQuat vmathQConj_V( VmathQuat quat );
-
-/*
- * Use a unit-length quaternion to rotate a 3-D vector
- */
-static inline VmathVector3 vmathQRotate_V( VmathQuat unitQuat, VmathVector3 vec );
-
-/*
- * Compute the dot product of two quaternions
- */
-static inline float vmathQDot_V( VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Compute the norm of a quaternion
- */
-static inline float vmathQNorm_V( VmathQuat quat );
-
-/*
- * Compute the length of a quaternion
- */
-static inline float vmathQLength_V( VmathQuat quat );
-
-/*
- * Normalize a quaternion
- * NOTE: 
- * The result is unpredictable when all elements of quat are at or near zero.
- */
-static inline VmathQuat vmathQNormalize_V( VmathQuat quat );
-
-/*
- * Linear interpolation between two quaternions
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathQuat vmathQLerp_V( float t, VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Spherical linear interpolation between two quaternions
- * NOTE: 
- * Interpolates along the shortest path between orientations.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathQuat vmathQSlerp_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1 );
-
-/*
- * Spherical quadrangle interpolation
- */
-static inline VmathQuat vmathQSquad_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1, VmathQuat unitQuat2, VmathQuat unitQuat3 );
-
-/*
- * Conditionally select between two quaternions
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline VmathQuat vmathQSelect_V( VmathQuat quat0, VmathQuat quat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a quaternion
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathQPrint_V( VmathQuat quat );
-
-/*
- * Print a quaternion and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathQPrints_V( VmathQuat quat, const char *name );
-
-#endif
-
-/*
- * Construct a 3x3 matrix containing the specified columns
- */
-static inline VmathMatrix3 vmathM3MakeFromCols_V( VmathVector3 col0, VmathVector3 col1, VmathVector3 col2 );
-
-/*
- * Construct a 3x3 rotation matrix from a unit-length quaternion
- */
-static inline VmathMatrix3 vmathM3MakeFromQ_V( VmathQuat unitQuat );
-
-/*
- * Set all elements of a 3x3 matrix to the same scalar value
- */
-static inline VmathMatrix3 vmathM3MakeFromScalar_V( float scalar );
-
-/*
- * Set column 0 of a 3x3 matrix
- */
-static inline void vmathM3SetCol0_V( VmathMatrix3 *result, VmathVector3 col0 );
-
-/*
- * Set column 1 of a 3x3 matrix
- */
-static inline void vmathM3SetCol1_V( VmathMatrix3 *result, VmathVector3 col1 );
-
-/*
- * Set column 2 of a 3x3 matrix
- */
-static inline void vmathM3SetCol2_V( VmathMatrix3 *result, VmathVector3 col2 );
-
-/*
- * Get column 0 of a 3x3 matrix
- */
-static inline VmathVector3 vmathM3GetCol0_V( VmathMatrix3 mat );
-
-/*
- * Get column 1 of a 3x3 matrix
- */
-static inline VmathVector3 vmathM3GetCol1_V( VmathMatrix3 mat );
-
-/*
- * Get column 2 of a 3x3 matrix
- */
-static inline VmathVector3 vmathM3GetCol2_V( VmathMatrix3 mat );
-
-/*
- * Set the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3SetCol_V( VmathMatrix3 *result, int col, VmathVector3 vec );
-
-/*
- * Set the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3SetRow_V( VmathMatrix3 *result, int row, VmathVector3 vec );
-
-/*
- * Get the column of a 3x3 matrix referred to by the specified index
- */
-static inline VmathVector3 vmathM3GetCol_V( VmathMatrix3 mat, int col );
-
-/*
- * Get the row of a 3x3 matrix referred to by the specified index
- */
-static inline VmathVector3 vmathM3GetRow_V( VmathMatrix3 mat, int row );
-
-/*
- * Set the element of a 3x3 matrix referred to by column and row indices
- */
-static inline void vmathM3SetElem_V( VmathMatrix3 *result, int col, int row, float val );
-
-/*
- * Get the element of a 3x3 matrix referred to by column and row indices
- */
-static inline float vmathM3GetElem_V( VmathMatrix3 mat, int col, int row );
-
-/*
- * Add two 3x3 matrices
- */
-static inline VmathMatrix3 vmathM3Add_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
-
-/*
- * Subtract a 3x3 matrix from another 3x3 matrix
- */
-static inline VmathMatrix3 vmathM3Sub_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
-
-/*
- * Negate all elements of a 3x3 matrix
- */
-static inline VmathMatrix3 vmathM3Neg_V( VmathMatrix3 mat );
-
-/*
- * Multiply a 3x3 matrix by a scalar
- */
-static inline VmathMatrix3 vmathM3ScalarMul_V( VmathMatrix3 mat, float scalar );
-
-/*
- * Multiply a 3x3 matrix by a 3-D vector
- */
-static inline VmathVector3 vmathM3MulV3_V( VmathMatrix3 mat, VmathVector3 vec );
-
-/*
- * Multiply two 3x3 matrices
- */
-static inline VmathMatrix3 vmathM3Mul_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
-
-/*
- * Construct an identity 3x3 matrix
- */
-static inline VmathMatrix3 vmathM3MakeIdentity_V( );
-
-/*
- * Construct a 3x3 matrix to rotate around the x axis
- */
-static inline VmathMatrix3 vmathM3MakeRotationX_V( float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the y axis
- */
-static inline VmathMatrix3 vmathM3MakeRotationY_V( float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the z axis
- */
-static inline VmathMatrix3 vmathM3MakeRotationZ_V( float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the x, y, and z axes
- */
-static inline VmathMatrix3 vmathM3MakeRotationZYX_V( VmathVector3 radiansXYZ );
-
-/*
- * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathMatrix3 vmathM3MakeRotationAxis_V( float radians, VmathVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathMatrix3 vmathM3MakeRotationQ_V( VmathQuat unitQuat );
-
-/*
- * Construct a 3x3 matrix to perform scaling
- */
-static inline VmathMatrix3 vmathM3MakeScale_V( VmathVector3 scaleVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathMatrix3 vmathM3AppendScale_V( VmathMatrix3 mat, VmathVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathMatrix3 vmathM3PrependScale_V( VmathVector3 scaleVec, VmathMatrix3 mat );
-
-/*
- * Multiply two 3x3 matrices per element
- */
-static inline VmathMatrix3 vmathM3MulPerElem_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
-
-/*
- * Compute the absolute value of a 3x3 matrix per element
- */
-static inline VmathMatrix3 vmathM3AbsPerElem_V( VmathMatrix3 mat );
-
-/*
- * Transpose of a 3x3 matrix
- */
-static inline VmathMatrix3 vmathM3Transpose_V( VmathMatrix3 mat );
-
-/*
- * Compute the inverse of a 3x3 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathMatrix3 vmathM3Inverse_V( VmathMatrix3 mat );
-
-/*
- * Determinant of a 3x3 matrix
- */
-static inline float vmathM3Determinant_V( VmathMatrix3 mat );
-
-/*
- * Conditionally select between two 3x3 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline VmathMatrix3 vmathM3Select_V( VmathMatrix3 mat0, VmathMatrix3 mat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x3 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM3Print_V( VmathMatrix3 mat );
-
-/*
- * Print a 3x3 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM3Prints_V( VmathMatrix3 mat, const char *name );
-
-#endif
-
-/*
- * Construct a 4x4 matrix containing the specified columns
- */
-static inline VmathMatrix4 vmathM4MakeFromCols_V( VmathVector4 col0, VmathVector4 col1, VmathVector4 col2, VmathVector4 col3 );
-
-/*
- * Construct a 4x4 matrix from a 3x4 transformation matrix
- */
-static inline VmathMatrix4 vmathM4MakeFromT3_V( VmathTransform3 mat );
-
-/*
- * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
- */
-static inline VmathMatrix4 vmathM4MakeFromM3V3_V( VmathMatrix3 mat, VmathVector3 translateVec );
-
-/*
- * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
- */
-static inline VmathMatrix4 vmathM4MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec );
-
-/*
- * Set all elements of a 4x4 matrix to the same scalar value
- */
-static inline VmathMatrix4 vmathM4MakeFromScalar_V( float scalar );
-
-/*
- * Set the upper-left 3x3 submatrix
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathM4SetUpper3x3_V( VmathMatrix4 *result, VmathMatrix3 mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 4x4 matrix
- */
-static inline VmathMatrix3 vmathM4GetUpper3x3_V( VmathMatrix4 mat );
-
-/*
- * Set translation component
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathM4SetTranslation_V( VmathMatrix4 *result, VmathVector3 translateVec );
-
-/*
- * Get the translation component of a 4x4 matrix
- */
-static inline VmathVector3 vmathM4GetTranslation_V( VmathMatrix4 mat );
-
-/*
- * Set column 0 of a 4x4 matrix
- */
-static inline void vmathM4SetCol0_V( VmathMatrix4 *result, VmathVector4 col0 );
-
-/*
- * Set column 1 of a 4x4 matrix
- */
-static inline void vmathM4SetCol1_V( VmathMatrix4 *result, VmathVector4 col1 );
-
-/*
- * Set column 2 of a 4x4 matrix
- */
-static inline void vmathM4SetCol2_V( VmathMatrix4 *result, VmathVector4 col2 );
-
-/*
- * Set column 3 of a 4x4 matrix
- */
-static inline void vmathM4SetCol3_V( VmathMatrix4 *result, VmathVector4 col3 );
-
-/*
- * Get column 0 of a 4x4 matrix
- */
-static inline VmathVector4 vmathM4GetCol0_V( VmathMatrix4 mat );
-
-/*
- * Get column 1 of a 4x4 matrix
- */
-static inline VmathVector4 vmathM4GetCol1_V( VmathMatrix4 mat );
-
-/*
- * Get column 2 of a 4x4 matrix
- */
-static inline VmathVector4 vmathM4GetCol2_V( VmathMatrix4 mat );
-
-/*
- * Get column 3 of a 4x4 matrix
- */
-static inline VmathVector4 vmathM4GetCol3_V( VmathMatrix4 mat );
-
-/*
- * Set the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4SetCol_V( VmathMatrix4 *result, int col, VmathVector4 vec );
-
-/*
- * Set the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4SetRow_V( VmathMatrix4 *result, int row, VmathVector4 vec );
-
-/*
- * Get the column of a 4x4 matrix referred to by the specified index
- */
-static inline VmathVector4 vmathM4GetCol_V( VmathMatrix4 mat, int col );
-
-/*
- * Get the row of a 4x4 matrix referred to by the specified index
- */
-static inline VmathVector4 vmathM4GetRow_V( VmathMatrix4 mat, int row );
-
-/*
- * Set the element of a 4x4 matrix referred to by column and row indices
- */
-static inline void vmathM4SetElem_V( VmathMatrix4 *result, int col, int row, float val );
-
-/*
- * Get the element of a 4x4 matrix referred to by column and row indices
- */
-static inline float vmathM4GetElem_V( VmathMatrix4 mat, int col, int row );
-
-/*
- * Add two 4x4 matrices
- */
-static inline VmathMatrix4 vmathM4Add_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
-
-/*
- * Subtract a 4x4 matrix from another 4x4 matrix
- */
-static inline VmathMatrix4 vmathM4Sub_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
-
-/*
- * Negate all elements of a 4x4 matrix
- */
-static inline VmathMatrix4 vmathM4Neg_V( VmathMatrix4 mat );
-
-/*
- * Multiply a 4x4 matrix by a scalar
- */
-static inline VmathMatrix4 vmathM4ScalarMul_V( VmathMatrix4 mat, float scalar );
-
-/*
- * Multiply a 4x4 matrix by a 4-D vector
- */
-static inline VmathVector4 vmathM4MulV4_V( VmathMatrix4 mat, VmathVector4 vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D vector
- */
-static inline VmathVector4 vmathM4MulV3_V( VmathMatrix4 mat, VmathVector3 vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D point
- */
-static inline VmathVector4 vmathM4MulP3_V( VmathMatrix4 mat, VmathPoint3 pnt );
-
-/*
- * Multiply two 4x4 matrices
- */
-static inline VmathMatrix4 vmathM4Mul_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
-
-/*
- * Multiply a 4x4 matrix by a 3x4 transformation matrix
- */
-static inline VmathMatrix4 vmathM4MulT3_V( VmathMatrix4 mat, VmathTransform3 tfrm );
-
-/*
- * Construct an identity 4x4 matrix
- */
-static inline VmathMatrix4 vmathM4MakeIdentity_V( );
-
-/*
- * Construct a 4x4 matrix to rotate around the x axis
- */
-static inline VmathMatrix4 vmathM4MakeRotationX_V( float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the y axis
- */
-static inline VmathMatrix4 vmathM4MakeRotationY_V( float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the z axis
- */
-static inline VmathMatrix4 vmathM4MakeRotationZ_V( float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the x, y, and z axes
- */
-static inline VmathMatrix4 vmathM4MakeRotationZYX_V( VmathVector3 radiansXYZ );
-
-/*
- * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathMatrix4 vmathM4MakeRotationAxis_V( float radians, VmathVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathMatrix4 vmathM4MakeRotationQ_V( VmathQuat unitQuat );
-
-/*
- * Construct a 4x4 matrix to perform scaling
- */
-static inline VmathMatrix4 vmathM4MakeScale_V( VmathVector3 scaleVec );
-
-/*
- * Construct a 4x4 matrix to perform translation
- */
-static inline VmathMatrix4 vmathM4MakeTranslation_V( VmathVector3 translateVec );
-
-/*
- * Construct viewing matrix based on eye position, position looked at, and up direction
- */
-static inline VmathMatrix4 vmathM4MakeLookAt_V( VmathPoint3 eyePos, VmathPoint3 lookAtPos, VmathVector3 upVec );
-
-/*
- * Construct a perspective projection matrix
- */
-static inline VmathMatrix4 vmathM4MakePerspective_V( float fovyRadians, float aspect, float zNear, float zFar );
-
-/*
- * Construct a perspective projection matrix based on frustum
- */
-static inline VmathMatrix4 vmathM4MakeFrustum_V( float left, float right, float bottom, float top, float zNear, float zFar );
-
-/*
- * Construct an orthographic projection matrix
- */
-static inline VmathMatrix4 vmathM4MakeOrthographic_V( float left, float right, float bottom, float top, float zNear, float zFar );
-
-/*
- * Append (post-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathMatrix4 vmathM4AppendScale_V( VmathMatrix4 mat, VmathVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathMatrix4 vmathM4PrependScale_V( VmathVector3 scaleVec, VmathMatrix4 mat );
-
-/*
- * Multiply two 4x4 matrices per element
- */
-static inline VmathMatrix4 vmathM4MulPerElem_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
-
-/*
- * Compute the absolute value of a 4x4 matrix per element
- */
-static inline VmathMatrix4 vmathM4AbsPerElem_V( VmathMatrix4 mat );
-
-/*
- * Transpose of a 4x4 matrix
- */
-static inline VmathMatrix4 vmathM4Transpose_V( VmathMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathMatrix4 vmathM4Inverse_V( VmathMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathMatrix4 vmathM4AffineInverse_V( VmathMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
- */
-static inline VmathMatrix4 vmathM4OrthoInverse_V( VmathMatrix4 mat );
-
-/*
- * Determinant of a 4x4 matrix
- */
-static inline float vmathM4Determinant_V( VmathMatrix4 mat );
-
-/*
- * Conditionally select between two 4x4 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline VmathMatrix4 vmathM4Select_V( VmathMatrix4 mat0, VmathMatrix4 mat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4x4 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM4Print_V( VmathMatrix4 mat );
-
-/*
- * Print a 4x4 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM4Prints_V( VmathMatrix4 mat, const char *name );
-
-#endif
-
-/*
- * Construct a 3x4 transformation matrix containing the specified columns
- */
-static inline VmathTransform3 vmathT3MakeFromCols_V( VmathVector3 col0, VmathVector3 col1, VmathVector3 col2, VmathVector3 col3 );
-
-/*
- * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
- */
-static inline VmathTransform3 vmathT3MakeFromM3V3_V( VmathMatrix3 tfrm, VmathVector3 translateVec );
-
-/*
- * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
- */
-static inline VmathTransform3 vmathT3MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec );
-
-/*
- * Set all elements of a 3x4 transformation matrix to the same scalar value
- */
-static inline VmathTransform3 vmathT3MakeFromScalar_V( float scalar );
-
-/*
- * Set the upper-left 3x3 submatrix
- */
-static inline void vmathT3SetUpper3x3_V( VmathTransform3 *result, VmathMatrix3 mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
- */
-static inline VmathMatrix3 vmathT3GetUpper3x3_V( VmathTransform3 tfrm );
-
-/*
- * Set translation component
- */
-static inline void vmathT3SetTranslation_V( VmathTransform3 *result, VmathVector3 translateVec );
-
-/*
- * Get the translation component of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetTranslation_V( VmathTransform3 tfrm );
-
-/*
- * Set column 0 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol0_V( VmathTransform3 *result, VmathVector3 col0 );
-
-/*
- * Set column 1 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol1_V( VmathTransform3 *result, VmathVector3 col1 );
-
-/*
- * Set column 2 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol2_V( VmathTransform3 *result, VmathVector3 col2 );
-
-/*
- * Set column 3 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol3_V( VmathTransform3 *result, VmathVector3 col3 );
-
-/*
- * Get column 0 of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetCol0_V( VmathTransform3 tfrm );
-
-/*
- * Get column 1 of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetCol1_V( VmathTransform3 tfrm );
-
-/*
- * Get column 2 of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetCol2_V( VmathTransform3 tfrm );
-
-/*
- * Get column 3 of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetCol3_V( VmathTransform3 tfrm );
-
-/*
- * Set the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3SetCol_V( VmathTransform3 *result, int col, VmathVector3 vec );
-
-/*
- * Set the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3SetRow_V( VmathTransform3 *result, int row, VmathVector4 vec );
-
-/*
- * Get the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline VmathVector3 vmathT3GetCol_V( VmathTransform3 tfrm, int col );
-
-/*
- * Get the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline VmathVector4 vmathT3GetRow_V( VmathTransform3 tfrm, int row );
-
-/*
- * Set the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline void vmathT3SetElem_V( VmathTransform3 *result, int col, int row, float val );
-
-/*
- * Get the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline float vmathT3GetElem_V( VmathTransform3 tfrm, int col, int row );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D vector
- */
-static inline VmathVector3 vmathT3MulV3_V( VmathTransform3 tfrm, VmathVector3 vec );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D point
- */
-static inline VmathPoint3 vmathT3MulP3_V( VmathTransform3 tfrm, VmathPoint3 pnt );
-
-/*
- * Multiply two 3x4 transformation matrices
- */
-static inline VmathTransform3 vmathT3Mul_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 );
-
-/*
- * Construct an identity 3x4 transformation matrix
- */
-static inline VmathTransform3 vmathT3MakeIdentity_V( );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x axis
- */
-static inline VmathTransform3 vmathT3MakeRotationX_V( float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the y axis
- */
-static inline VmathTransform3 vmathT3MakeRotationY_V( float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the z axis
- */
-static inline VmathTransform3 vmathT3MakeRotationZ_V( float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
- */
-static inline VmathTransform3 vmathT3MakeRotationZYX_V( VmathVector3 radiansXYZ );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathTransform3 vmathT3MakeRotationAxis_V( float radians, VmathVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathTransform3 vmathT3MakeRotationQ_V( VmathQuat unitQuat );
-
-/*
- * Construct a 3x4 transformation matrix to perform scaling
- */
-static inline VmathTransform3 vmathT3MakeScale_V( VmathVector3 scaleVec );
-
-/*
- * Construct a 3x4 transformation matrix to perform translation
- */
-static inline VmathTransform3 vmathT3MakeTranslation_V( VmathVector3 translateVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathTransform3 vmathT3AppendScale_V( VmathTransform3 tfrm, VmathVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathTransform3 vmathT3PrependScale_V( VmathVector3 scaleVec, VmathTransform3 tfrm );
-
-/*
- * Multiply two 3x4 transformation matrices per element
- */
-static inline VmathTransform3 vmathT3MulPerElem_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 );
-
-/*
- * Compute the absolute value of a 3x4 transformation matrix per element
- */
-static inline VmathTransform3 vmathT3AbsPerElem_V( VmathTransform3 tfrm );
-
-/*
- * Inverse of a 3x4 transformation matrix
- * NOTE: 
- * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
- */
-static inline VmathTransform3 vmathT3Inverse_V( VmathTransform3 tfrm );
-
-/*
- * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
- */
-static inline VmathTransform3 vmathT3OrthoInverse_V( VmathTransform3 tfrm );
-
-/*
- * Conditionally select between two 3x4 transformation matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline VmathTransform3 vmathT3Select_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x4 transformation matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathT3Print_V( VmathTransform3 tfrm );
-
-/*
- * Print a 3x4 transformation matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathT3Prints_V( VmathTransform3 tfrm, const char *name );
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#include "vectormath_aos.h"
-#include "vec_aos_v.h"
-#include "quat_aos_v.h"
-#include "mat_aos_v.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_AOS_C_V_PPU_H
+#define _VECTORMATH_AOS_C_V_PPU_H
+
+#include <math.h>
+#include <altivec.h>
+#include "vec_types.h"
+
+#ifdef _VECTORMATH_DEBUG
+#include <stdio.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef _VECTORMATH_AOS_C_TYPES_H
+#define _VECTORMATH_AOS_C_TYPES_H
+
+/* A 3-D vector in array-of-structures format
+ */
+typedef struct _VmathVector3
+{
+    vec_float4 vec128;
+} VmathVector3;
+
+/* A 4-D vector in array-of-structures format
+ */
+typedef struct _VmathVector4
+{
+    vec_float4 vec128;
+} VmathVector4;
+
+/* A 3-D point in array-of-structures format
+ */
+typedef struct _VmathPoint3
+{
+    vec_float4 vec128;
+} VmathPoint3;
+
+/* A quaternion in array-of-structures format
+ */
+typedef struct _VmathQuat
+{
+    vec_float4 vec128;
+} VmathQuat;
+
+/* A 3x3 matrix in array-of-structures format
+ */
+typedef struct _VmathMatrix3
+{
+    VmathVector3 col0;
+    VmathVector3 col1;
+    VmathVector3 col2;
+} VmathMatrix3;
+
+/* A 4x4 matrix in array-of-structures format
+ */
+typedef struct _VmathMatrix4
+{
+    VmathVector4 col0;
+    VmathVector4 col1;
+    VmathVector4 col2;
+    VmathVector4 col3;
+} VmathMatrix4;
+
+/* A 3x4 transformation matrix in array-of-structures format
+ */
+typedef struct _VmathTransform3
+{
+    VmathVector3 col0;
+    VmathVector3 col1;
+    VmathVector3 col2;
+    VmathVector3 col3;
+} VmathTransform3;
+
+#endif
+
+/*
+ * Construct a 3-D vector from x, y, and z elements
+ */
+static inline VmathVector3 vmathV3MakeFromElems_V( float x, float y, float z );
+
+/*
+ * Copy elements from a 3-D point into a 3-D vector
+ */
+static inline VmathVector3 vmathV3MakeFromP3_V( VmathPoint3 pnt );
+
+/*
+ * Set all elements of a 3-D vector to the same scalar value
+ */
+static inline VmathVector3 vmathV3MakeFromScalar_V( float scalar );
+
+/*
+ * Set vector float data in a 3-D vector
+ */
+static inline VmathVector3 vmathV3MakeFrom128_V( vec_float4 vf4 );
+
+/*
+ * Get vector float data from a 3-D vector
+ */
+static inline vec_float4 vmathV3Get128_V( VmathVector3 vec );
+
+/*
+ * Set the x element of a 3-D vector
+ */
+static inline void vmathV3SetX_V( VmathVector3 *result, float x );
+
+/*
+ * Set the y element of a 3-D vector
+ */
+static inline void vmathV3SetY_V( VmathVector3 *result, float y );
+
+/*
+ * Set the z element of a 3-D vector
+ */
+static inline void vmathV3SetZ_V( VmathVector3 *result, float z );
+
+/*
+ * Get the x element of a 3-D vector
+ */
+static inline float vmathV3GetX_V( VmathVector3 vec );
+
+/*
+ * Get the y element of a 3-D vector
+ */
+static inline float vmathV3GetY_V( VmathVector3 vec );
+
+/*
+ * Get the z element of a 3-D vector
+ */
+static inline float vmathV3GetZ_V( VmathVector3 vec );
+
+/*
+ * Set an x, y, or z element of a 3-D vector by index
+ */
+static inline void vmathV3SetElem_V( VmathVector3 *result, int idx, float value );
+
+/*
+ * Get an x, y, or z element of a 3-D vector by index
+ */
+static inline float vmathV3GetElem_V( VmathVector3 vec, int idx );
+
+/*
+ * Add two 3-D vectors
+ */
+static inline VmathVector3 vmathV3Add_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Subtract a 3-D vector from another 3-D vector
+ */
+static inline VmathVector3 vmathV3Sub_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Add a 3-D vector to a 3-D point
+ */
+static inline VmathPoint3 vmathV3AddP3_V( VmathVector3 vec, VmathPoint3 pnt );
+
+/*
+ * Multiply a 3-D vector by a scalar
+ */
+static inline VmathVector3 vmathV3ScalarMul_V( VmathVector3 vec, float scalar );
+
+/*
+ * Divide a 3-D vector by a scalar
+ */
+static inline VmathVector3 vmathV3ScalarDiv_V( VmathVector3 vec, float scalar );
+
+/*
+ * Negate all elements of a 3-D vector
+ */
+static inline VmathVector3 vmathV3Neg_V( VmathVector3 vec );
+
+/*
+ * Construct x axis
+ */
+static inline VmathVector3 vmathV3MakeXAxis_V( );
+
+/*
+ * Construct y axis
+ */
+static inline VmathVector3 vmathV3MakeYAxis_V( );
+
+/*
+ * Construct z axis
+ */
+static inline VmathVector3 vmathV3MakeZAxis_V( );
+
+/*
+ * Multiply two 3-D vectors per element
+ */
+static inline VmathVector3 vmathV3MulPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Divide two 3-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathVector3 vmathV3DivPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Compute the reciprocal of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathVector3 vmathV3RecipPerElem_V( VmathVector3 vec );
+
+/*
+ * Compute the square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathVector3 vmathV3SqrtPerElem_V( VmathVector3 vec );
+
+/*
+ * Compute the reciprocal square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathVector3 vmathV3RsqrtPerElem_V( VmathVector3 vec );
+
+/*
+ * Compute the absolute value of a 3-D vector per element
+ */
+static inline VmathVector3 vmathV3AbsPerElem_V( VmathVector3 vec );
+
+/*
+ * Copy sign from one 3-D vector to another, per element
+ */
+static inline VmathVector3 vmathV3CopySignPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Maximum of two 3-D vectors per element
+ */
+static inline VmathVector3 vmathV3MaxPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Minimum of two 3-D vectors per element
+ */
+static inline VmathVector3 vmathV3MinPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Maximum element of a 3-D vector
+ */
+static inline float vmathV3MaxElem_V( VmathVector3 vec );
+
+/*
+ * Minimum element of a 3-D vector
+ */
+static inline float vmathV3MinElem_V( VmathVector3 vec );
+
+/*
+ * Compute the sum of all elements of a 3-D vector
+ */
+static inline float vmathV3Sum_V( VmathVector3 vec );
+
+/*
+ * Compute the dot product of two 3-D vectors
+ */
+static inline float vmathV3Dot_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Compute the square of the length of a 3-D vector
+ */
+static inline float vmathV3LengthSqr_V( VmathVector3 vec );
+
+/*
+ * Compute the length of a 3-D vector
+ */
+static inline float vmathV3Length_V( VmathVector3 vec );
+
+/*
+ * Normalize a 3-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline VmathVector3 vmathV3Normalize_V( VmathVector3 vec );
+
+/*
+ * Compute cross product of two 3-D vectors
+ */
+static inline VmathVector3 vmathV3Cross_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Outer product of two 3-D vectors
+ */
+static inline VmathMatrix3 vmathV3Outer_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Pre-multiply a row vector by a 3x3 matrix
+ * NOTE: 
+ * Slower than column post-multiply.
+ */
+static inline VmathVector3 vmathV3RowMul_V( VmathVector3 vec, VmathMatrix3 mat );
+
+/*
+ * Cross-product matrix of a 3-D vector
+ */
+static inline VmathMatrix3 vmathV3CrossMatrix_V( VmathVector3 vec );
+
+/*
+ * Create cross-product matrix and multiply
+ * NOTE: 
+ * Faster than separately creating a cross-product matrix and multiplying.
+ */
+static inline VmathMatrix3 vmathV3CrossMatrixMul_V( VmathVector3 vec, VmathMatrix3 mat );
+
+/*
+ * Linear interpolation between two 3-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathVector3 vmathV3Lerp_V( float t, VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Spherical linear interpolation between two 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathVector3 vmathV3Slerp_V( float t, VmathVector3 unitVec0, VmathVector3 unitVec1 );
+
+/*
+ * Conditionally select between two 3-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline VmathVector3 vmathV3Select_V( VmathVector3 vec0, VmathVector3 vec1, unsigned int select1 );
+
+/*
+ * Store x, y, and z elements of a 3-D vector in the first three words of a quadword.
+ * The value of the fourth word (the word with the highest address) remains unchanged
+ */
+static inline void vmathV3StoreXYZ_V( VmathVector3 vec, vec_float4 *quad );
+
+/*
+ * Load four three-float 3-D vectors, stored in three quadwords
+ */
+static inline void vmathV3LoadXYZArray_V( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads );
+
+/*
+ * Store four 3-D vectors in three quadwords
+ */
+static inline void vmathV3StoreXYZArray_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, vec_float4 *threeQuads );
+
+/*
+ * Store eight 3-D vectors as half-floats
+ */
+static inline void vmathV3StoreHalfFloats_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, VmathVector3 vec4, VmathVector3 vec5, VmathVector3 vec6, VmathVector3 vec7, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV3Print_V( VmathVector3 vec );
+
+/*
+ * Print a 3-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV3Prints_V( VmathVector3 vec, const char *name );
+
+#endif
+
+/*
+ * Construct a 4-D vector from x, y, z, and w elements
+ */
+static inline VmathVector4 vmathV4MakeFromElems_V( float x, float y, float z, float w );
+
+/*
+ * Construct a 4-D vector from a 3-D vector and a scalar
+ */
+static inline VmathVector4 vmathV4MakeFromV3Scalar_V( VmathVector3 xyz, float w );
+
+/*
+ * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+ */
+static inline VmathVector4 vmathV4MakeFromV3_V( VmathVector3 vec );
+
+/*
+ * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+ */
+static inline VmathVector4 vmathV4MakeFromP3_V( VmathPoint3 pnt );
+
+/*
+ * Copy elements from a quaternion into a 4-D vector
+ */
+static inline VmathVector4 vmathV4MakeFromQ_V( VmathQuat quat );
+
+/*
+ * Set all elements of a 4-D vector to the same scalar value
+ */
+static inline VmathVector4 vmathV4MakeFromScalar_V( float scalar );
+
+/*
+ * Set vector float data in a 4-D vector
+ */
+static inline VmathVector4 vmathV4MakeFrom128_V( vec_float4 vf4 );
+
+/*
+ * Get vector float data from a 4-D vector
+ */
+static inline vec_float4 vmathV4Get128_V( VmathVector4 vec );
+
+/*
+ * Set the x, y, and z elements of a 4-D vector
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathV4SetXYZ_V( VmathVector4 *result, VmathVector3 vec );
+
+/*
+ * Get the x, y, and z elements of a 4-D vector
+ */
+static inline VmathVector3 vmathV4GetXYZ_V( VmathVector4 vec );
+
+/*
+ * Set the x element of a 4-D vector
+ */
+static inline void vmathV4SetX_V( VmathVector4 *result, float x );
+
+/*
+ * Set the y element of a 4-D vector
+ */
+static inline void vmathV4SetY_V( VmathVector4 *result, float y );
+
+/*
+ * Set the z element of a 4-D vector
+ */
+static inline void vmathV4SetZ_V( VmathVector4 *result, float z );
+
+/*
+ * Set the w element of a 4-D vector
+ */
+static inline void vmathV4SetW_V( VmathVector4 *result, float w );
+
+/*
+ * Get the x element of a 4-D vector
+ */
+static inline float vmathV4GetX_V( VmathVector4 vec );
+
+/*
+ * Get the y element of a 4-D vector
+ */
+static inline float vmathV4GetY_V( VmathVector4 vec );
+
+/*
+ * Get the z element of a 4-D vector
+ */
+static inline float vmathV4GetZ_V( VmathVector4 vec );
+
+/*
+ * Get the w element of a 4-D vector
+ */
+static inline float vmathV4GetW_V( VmathVector4 vec );
+
+/*
+ * Set an x, y, z, or w element of a 4-D vector by index
+ */
+static inline void vmathV4SetElem_V( VmathVector4 *result, int idx, float value );
+
+/*
+ * Get an x, y, z, or w element of a 4-D vector by index
+ */
+static inline float vmathV4GetElem_V( VmathVector4 vec, int idx );
+
+/*
+ * Add two 4-D vectors
+ */
+static inline VmathVector4 vmathV4Add_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Subtract a 4-D vector from another 4-D vector
+ */
+static inline VmathVector4 vmathV4Sub_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Multiply a 4-D vector by a scalar
+ */
+static inline VmathVector4 vmathV4ScalarMul_V( VmathVector4 vec, float scalar );
+
+/*
+ * Divide a 4-D vector by a scalar
+ */
+static inline VmathVector4 vmathV4ScalarDiv_V( VmathVector4 vec, float scalar );
+
+/*
+ * Negate all elements of a 4-D vector
+ */
+static inline VmathVector4 vmathV4Neg_V( VmathVector4 vec );
+
+/*
+ * Construct x axis
+ */
+static inline VmathVector4 vmathV4MakeXAxis_V( );
+
+/*
+ * Construct y axis
+ */
+static inline VmathVector4 vmathV4MakeYAxis_V( );
+
+/*
+ * Construct z axis
+ */
+static inline VmathVector4 vmathV4MakeZAxis_V( );
+
+/*
+ * Construct w axis
+ */
+static inline VmathVector4 vmathV4MakeWAxis_V( );
+
+/*
+ * Multiply two 4-D vectors per element
+ */
+static inline VmathVector4 vmathV4MulPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Divide two 4-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathVector4 vmathV4DivPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Compute the reciprocal of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathVector4 vmathV4RecipPerElem_V( VmathVector4 vec );
+
+/*
+ * Compute the square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathVector4 vmathV4SqrtPerElem_V( VmathVector4 vec );
+
+/*
+ * Compute the reciprocal square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathVector4 vmathV4RsqrtPerElem_V( VmathVector4 vec );
+
+/*
+ * Compute the absolute value of a 4-D vector per element
+ */
+static inline VmathVector4 vmathV4AbsPerElem_V( VmathVector4 vec );
+
+/*
+ * Copy sign from one 4-D vector to another, per element
+ */
+static inline VmathVector4 vmathV4CopySignPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Maximum of two 4-D vectors per element
+ */
+static inline VmathVector4 vmathV4MaxPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Minimum of two 4-D vectors per element
+ */
+static inline VmathVector4 vmathV4MinPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Maximum element of a 4-D vector
+ */
+static inline float vmathV4MaxElem_V( VmathVector4 vec );
+
+/*
+ * Minimum element of a 4-D vector
+ */
+static inline float vmathV4MinElem_V( VmathVector4 vec );
+
+/*
+ * Compute the sum of all elements of a 4-D vector
+ */
+static inline float vmathV4Sum_V( VmathVector4 vec );
+
+/*
+ * Compute the dot product of two 4-D vectors
+ */
+static inline float vmathV4Dot_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Compute the square of the length of a 4-D vector
+ */
+static inline float vmathV4LengthSqr_V( VmathVector4 vec );
+
+/*
+ * Compute the length of a 4-D vector
+ */
+static inline float vmathV4Length_V( VmathVector4 vec );
+
+/*
+ * Normalize a 4-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline VmathVector4 vmathV4Normalize_V( VmathVector4 vec );
+
+/*
+ * Outer product of two 4-D vectors
+ */
+static inline VmathMatrix4 vmathV4Outer_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Linear interpolation between two 4-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathVector4 vmathV4Lerp_V( float t, VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Spherical linear interpolation between two 4-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathVector4 vmathV4Slerp_V( float t, VmathVector4 unitVec0, VmathVector4 unitVec1 );
+
+/*
+ * Conditionally select between two 4-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline VmathVector4 vmathV4Select_V( VmathVector4 vec0, VmathVector4 vec1, unsigned int select1 );
+
+/*
+ * Store four 4-D vectors as half-floats
+ */
+static inline void vmathV4StoreHalfFloats_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3, vec_ushort8 *twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV4Print_V( VmathVector4 vec );
+
+/*
+ * Print a 4-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV4Prints_V( VmathVector4 vec, const char *name );
+
+#endif
+
+/*
+ * Construct a 3-D point from x, y, and z elements
+ */
+static inline VmathPoint3 vmathP3MakeFromElems_V( float x, float y, float z );
+
+/*
+ * Copy elements from a 3-D vector into a 3-D point
+ */
+static inline VmathPoint3 vmathP3MakeFromV3_V( VmathVector3 vec );
+
+/*
+ * Set all elements of a 3-D point to the same scalar value
+ */
+static inline VmathPoint3 vmathP3MakeFromScalar_V( float scalar );
+
+/*
+ * Set vector float data in a 3-D point
+ */
+static inline VmathPoint3 vmathP3MakeFrom128_V( vec_float4 vf4 );
+
+/*
+ * Get vector float data from a 3-D point
+ */
+static inline vec_float4 vmathP3Get128_V( VmathPoint3 pnt );
+
+/*
+ * Set the x element of a 3-D point
+ */
+static inline void vmathP3SetX_V( VmathPoint3 *result, float x );
+
+/*
+ * Set the y element of a 3-D point
+ */
+static inline void vmathP3SetY_V( VmathPoint3 *result, float y );
+
+/*
+ * Set the z element of a 3-D point
+ */
+static inline void vmathP3SetZ_V( VmathPoint3 *result, float z );
+
+/*
+ * Get the x element of a 3-D point
+ */
+static inline float vmathP3GetX_V( VmathPoint3 pnt );
+
+/*
+ * Get the y element of a 3-D point
+ */
+static inline float vmathP3GetY_V( VmathPoint3 pnt );
+
+/*
+ * Get the z element of a 3-D point
+ */
+static inline float vmathP3GetZ_V( VmathPoint3 pnt );
+
+/*
+ * Set an x, y, or z element of a 3-D point by index
+ */
+static inline void vmathP3SetElem_V( VmathPoint3 *result, int idx, float value );
+
+/*
+ * Get an x, y, or z element of a 3-D point by index
+ */
+static inline float vmathP3GetElem_V( VmathPoint3 pnt, int idx );
+
+/*
+ * Subtract a 3-D point from another 3-D point
+ */
+static inline VmathVector3 vmathP3Sub_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Add a 3-D point to a 3-D vector
+ */
+static inline VmathPoint3 vmathP3AddV3_V( VmathPoint3 pnt, VmathVector3 vec );
+
+/*
+ * Subtract a 3-D vector from a 3-D point
+ */
+static inline VmathPoint3 vmathP3SubV3_V( VmathPoint3 pnt, VmathVector3 vec );
+
+/*
+ * Multiply two 3-D points per element
+ */
+static inline VmathPoint3 vmathP3MulPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Divide two 3-D points per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathPoint3 vmathP3DivPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Compute the reciprocal of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathPoint3 vmathP3RecipPerElem_V( VmathPoint3 pnt );
+
+/*
+ * Compute the square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathPoint3 vmathP3SqrtPerElem_V( VmathPoint3 pnt );
+
+/*
+ * Compute the reciprocal square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathPoint3 vmathP3RsqrtPerElem_V( VmathPoint3 pnt );
+
+/*
+ * Compute the absolute value of a 3-D point per element
+ */
+static inline VmathPoint3 vmathP3AbsPerElem_V( VmathPoint3 pnt );
+
+/*
+ * Copy sign from one 3-D point to another, per element
+ */
+static inline VmathPoint3 vmathP3CopySignPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Maximum of two 3-D points per element
+ */
+static inline VmathPoint3 vmathP3MaxPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Minimum of two 3-D points per element
+ */
+static inline VmathPoint3 vmathP3MinPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Maximum element of a 3-D point
+ */
+static inline float vmathP3MaxElem_V( VmathPoint3 pnt );
+
+/*
+ * Minimum element of a 3-D point
+ */
+static inline float vmathP3MinElem_V( VmathPoint3 pnt );
+
+/*
+ * Compute the sum of all elements of a 3-D point
+ */
+static inline float vmathP3Sum_V( VmathPoint3 pnt );
+
+/*
+ * Apply uniform scale to a 3-D point
+ */
+static inline VmathPoint3 vmathP3Scale_V( VmathPoint3 pnt, float scaleVal );
+
+/*
+ * Apply non-uniform scale to a 3-D point
+ */
+static inline VmathPoint3 vmathP3NonUniformScale_V( VmathPoint3 pnt, VmathVector3 scaleVec );
+
+/*
+ * Scalar projection of a 3-D point on a unit-length 3-D vector
+ */
+static inline float vmathP3Projection_V( VmathPoint3 pnt, VmathVector3 unitVec );
+
+/*
+ * Compute the square of the distance of a 3-D point from the coordinate-system origin
+ */
+static inline float vmathP3DistSqrFromOrigin_V( VmathPoint3 pnt );
+
+/*
+ * Compute the distance of a 3-D point from the coordinate-system origin
+ */
+static inline float vmathP3DistFromOrigin_V( VmathPoint3 pnt );
+
+/*
+ * Compute the square of the distance between two 3-D points
+ */
+static inline float vmathP3DistSqr_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Compute the distance between two 3-D points
+ */
+static inline float vmathP3Dist_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Linear interpolation between two 3-D points
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathPoint3 vmathP3Lerp_V( float t, VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Conditionally select between two 3-D points
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline VmathPoint3 vmathP3Select_V( VmathPoint3 pnt0, VmathPoint3 pnt1, unsigned int select1 );
+
+/*
+ * Store x, y, and z elements of a 3-D point in the first three words of a quadword.
+ * The value of the fourth word (the word with the highest address) remains unchanged
+ */
+static inline void vmathP3StoreXYZ_V( VmathPoint3 pnt, vec_float4 *quad );
+
+/*
+ * Load four three-float 3-D points, stored in three quadwords
+ */
+static inline void vmathP3LoadXYZArray_V( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads );
+
+/*
+ * Store four 3-D points in three quadwords
+ */
+static inline void vmathP3StoreXYZArray_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, vec_float4 *threeQuads );
+
+/*
+ * Store eight 3-D points as half-floats
+ */
+static inline void vmathP3StoreHalfFloats_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, VmathPoint3 pnt4, VmathPoint3 pnt5, VmathPoint3 pnt6, VmathPoint3 pnt7, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D point
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathP3Print_V( VmathPoint3 pnt );
+
+/*
+ * Print a 3-D point and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathP3Prints_V( VmathPoint3 pnt, const char *name );
+
+#endif
+
+/*
+ * Construct a quaternion from x, y, z, and w elements
+ */
+static inline VmathQuat vmathQMakeFromElems_V( float x, float y, float z, float w );
+
+/*
+ * Construct a quaternion from a 3-D vector and a scalar
+ */
+static inline VmathQuat vmathQMakeFromV3Scalar_V( VmathVector3 xyz, float w );
+
+/*
+ * Copy elements from a 4-D vector into a quaternion
+ */
+static inline VmathQuat vmathQMakeFromV4_V( VmathVector4 vec );
+
+/*
+ * Convert a rotation matrix to a unit-length quaternion
+ */
+static inline VmathQuat vmathQMakeFromM3_V( VmathMatrix3 rotMat );
+
+/*
+ * Set all elements of a quaternion to the same scalar value
+ */
+static inline VmathQuat vmathQMakeFromScalar_V( float scalar );
+
+/*
+ * Set vector float data in a quaternion
+ */
+static inline VmathQuat vmathQMakeFrom128_V( vec_float4 vf4 );
+
+/*
+ * Get vector float data from a quaternion
+ */
+static inline vec_float4 vmathQGet128_V( VmathQuat quat );
+
+/*
+ * Set the x, y, and z elements of a quaternion
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathQSetXYZ_V( VmathQuat *result, VmathVector3 vec );
+
+/*
+ * Get the x, y, and z elements of a quaternion
+ */
+static inline VmathVector3 vmathQGetXYZ_V( VmathQuat quat );
+
+/*
+ * Set the x element of a quaternion
+ */
+static inline void vmathQSetX_V( VmathQuat *result, float x );
+
+/*
+ * Set the y element of a quaternion
+ */
+static inline void vmathQSetY_V( VmathQuat *result, float y );
+
+/*
+ * Set the z element of a quaternion
+ */
+static inline void vmathQSetZ_V( VmathQuat *result, float z );
+
+/*
+ * Set the w element of a quaternion
+ */
+static inline void vmathQSetW_V( VmathQuat *result, float w );
+
+/*
+ * Get the x element of a quaternion
+ */
+static inline float vmathQGetX_V( VmathQuat quat );
+
+/*
+ * Get the y element of a quaternion
+ */
+static inline float vmathQGetY_V( VmathQuat quat );
+
+/*
+ * Get the z element of a quaternion
+ */
+static inline float vmathQGetZ_V( VmathQuat quat );
+
+/*
+ * Get the w element of a quaternion
+ */
+static inline float vmathQGetW_V( VmathQuat quat );
+
+/*
+ * Set an x, y, z, or w element of a quaternion by index
+ */
+static inline void vmathQSetElem_V( VmathQuat *result, int idx, float value );
+
+/*
+ * Get an x, y, z, or w element of a quaternion by index
+ */
+static inline float vmathQGetElem_V( VmathQuat quat, int idx );
+
+/*
+ * Add two quaternions
+ */
+static inline VmathQuat vmathQAdd_V( VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Subtract a quaternion from another quaternion
+ */
+static inline VmathQuat vmathQSub_V( VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Multiply two quaternions
+ */
+static inline VmathQuat vmathQMul_V( VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Multiply a quaternion by a scalar
+ */
+static inline VmathQuat vmathQScalarMul_V( VmathQuat quat, float scalar );
+
+/*
+ * Divide a quaternion by a scalar
+ */
+static inline VmathQuat vmathQScalarDiv_V( VmathQuat quat, float scalar );
+
+/*
+ * Negate all elements of a quaternion
+ */
+static inline VmathQuat vmathQNeg_V( VmathQuat quat );
+
+/*
+ * Construct an identity quaternion
+ */
+static inline VmathQuat vmathQMakeIdentity_V( );
+
+/*
+ * Construct a quaternion to rotate between two unit-length 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+ */
+static inline VmathQuat vmathQMakeRotationArc_V( VmathVector3 unitVec0, VmathVector3 unitVec1 );
+
+/*
+ * Construct a quaternion to rotate around a unit-length 3-D vector
+ */
+static inline VmathQuat vmathQMakeRotationAxis_V( float radians, VmathVector3 unitVec );
+
+/*
+ * Construct a quaternion to rotate around the x axis
+ */
+static inline VmathQuat vmathQMakeRotationX_V( float radians );
+
+/*
+ * Construct a quaternion to rotate around the y axis
+ */
+static inline VmathQuat vmathQMakeRotationY_V( float radians );
+
+/*
+ * Construct a quaternion to rotate around the z axis
+ */
+static inline VmathQuat vmathQMakeRotationZ_V( float radians );
+
+/*
+ * Compute the conjugate of a quaternion
+ */
+static inline VmathQuat vmathQConj_V( VmathQuat quat );
+
+/*
+ * Use a unit-length quaternion to rotate a 3-D vector
+ */
+static inline VmathVector3 vmathQRotate_V( VmathQuat unitQuat, VmathVector3 vec );
+
+/*
+ * Compute the dot product of two quaternions
+ */
+static inline float vmathQDot_V( VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Compute the norm of a quaternion
+ */
+static inline float vmathQNorm_V( VmathQuat quat );
+
+/*
+ * Compute the length of a quaternion
+ */
+static inline float vmathQLength_V( VmathQuat quat );
+
+/*
+ * Normalize a quaternion
+ * NOTE: 
+ * The result is unpredictable when all elements of quat are at or near zero.
+ */
+static inline VmathQuat vmathQNormalize_V( VmathQuat quat );
+
+/*
+ * Linear interpolation between two quaternions
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathQuat vmathQLerp_V( float t, VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Spherical linear interpolation between two quaternions
+ * NOTE: 
+ * Interpolates along the shortest path between orientations.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathQuat vmathQSlerp_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1 );
+
+/*
+ * Spherical quadrangle interpolation
+ */
+static inline VmathQuat vmathQSquad_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1, VmathQuat unitQuat2, VmathQuat unitQuat3 );
+
+/*
+ * Conditionally select between two quaternions
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline VmathQuat vmathQSelect_V( VmathQuat quat0, VmathQuat quat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a quaternion
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathQPrint_V( VmathQuat quat );
+
+/*
+ * Print a quaternion and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathQPrints_V( VmathQuat quat, const char *name );
+
+#endif
+
+/*
+ * Construct a 3x3 matrix containing the specified columns
+ */
+static inline VmathMatrix3 vmathM3MakeFromCols_V( VmathVector3 col0, VmathVector3 col1, VmathVector3 col2 );
+
+/*
+ * Construct a 3x3 rotation matrix from a unit-length quaternion
+ */
+static inline VmathMatrix3 vmathM3MakeFromQ_V( VmathQuat unitQuat );
+
+/*
+ * Set all elements of a 3x3 matrix to the same scalar value
+ */
+static inline VmathMatrix3 vmathM3MakeFromScalar_V( float scalar );
+
+/*
+ * Set column 0 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol0_V( VmathMatrix3 *result, VmathVector3 col0 );
+
+/*
+ * Set column 1 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol1_V( VmathMatrix3 *result, VmathVector3 col1 );
+
+/*
+ * Set column 2 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol2_V( VmathMatrix3 *result, VmathVector3 col2 );
+
+/*
+ * Get column 0 of a 3x3 matrix
+ */
+static inline VmathVector3 vmathM3GetCol0_V( VmathMatrix3 mat );
+
+/*
+ * Get column 1 of a 3x3 matrix
+ */
+static inline VmathVector3 vmathM3GetCol1_V( VmathMatrix3 mat );
+
+/*
+ * Get column 2 of a 3x3 matrix
+ */
+static inline VmathVector3 vmathM3GetCol2_V( VmathMatrix3 mat );
+
+/*
+ * Set the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3SetCol_V( VmathMatrix3 *result, int col, VmathVector3 vec );
+
+/*
+ * Set the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3SetRow_V( VmathMatrix3 *result, int row, VmathVector3 vec );
+
+/*
+ * Get the column of a 3x3 matrix referred to by the specified index
+ */
+static inline VmathVector3 vmathM3GetCol_V( VmathMatrix3 mat, int col );
+
+/*
+ * Get the row of a 3x3 matrix referred to by the specified index
+ */
+static inline VmathVector3 vmathM3GetRow_V( VmathMatrix3 mat, int row );
+
+/*
+ * Set the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline void vmathM3SetElem_V( VmathMatrix3 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline float vmathM3GetElem_V( VmathMatrix3 mat, int col, int row );
+
+/*
+ * Add two 3x3 matrices
+ */
+static inline VmathMatrix3 vmathM3Add_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
+
+/*
+ * Subtract a 3x3 matrix from another 3x3 matrix
+ */
+static inline VmathMatrix3 vmathM3Sub_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
+
+/*
+ * Negate all elements of a 3x3 matrix
+ */
+static inline VmathMatrix3 vmathM3Neg_V( VmathMatrix3 mat );
+
+/*
+ * Multiply a 3x3 matrix by a scalar
+ */
+static inline VmathMatrix3 vmathM3ScalarMul_V( VmathMatrix3 mat, float scalar );
+
+/*
+ * Multiply a 3x3 matrix by a 3-D vector
+ */
+static inline VmathVector3 vmathM3MulV3_V( VmathMatrix3 mat, VmathVector3 vec );
+
+/*
+ * Multiply two 3x3 matrices
+ */
+static inline VmathMatrix3 vmathM3Mul_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
+
+/*
+ * Construct an identity 3x3 matrix
+ */
+static inline VmathMatrix3 vmathM3MakeIdentity_V( );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x axis
+ */
+static inline VmathMatrix3 vmathM3MakeRotationX_V( float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the y axis
+ */
+static inline VmathMatrix3 vmathM3MakeRotationY_V( float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the z axis
+ */
+static inline VmathMatrix3 vmathM3MakeRotationZ_V( float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x, y, and z axes
+ */
+static inline VmathMatrix3 vmathM3MakeRotationZYX_V( VmathVector3 radiansXYZ );
+
+/*
+ * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathMatrix3 vmathM3MakeRotationAxis_V( float radians, VmathVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathMatrix3 vmathM3MakeRotationQ_V( VmathQuat unitQuat );
+
+/*
+ * Construct a 3x3 matrix to perform scaling
+ */
+static inline VmathMatrix3 vmathM3MakeScale_V( VmathVector3 scaleVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathMatrix3 vmathM3AppendScale_V( VmathMatrix3 mat, VmathVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathMatrix3 vmathM3PrependScale_V( VmathVector3 scaleVec, VmathMatrix3 mat );
+
+/*
+ * Multiply two 3x3 matrices per element
+ */
+static inline VmathMatrix3 vmathM3MulPerElem_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
+
+/*
+ * Compute the absolute value of a 3x3 matrix per element
+ */
+static inline VmathMatrix3 vmathM3AbsPerElem_V( VmathMatrix3 mat );
+
+/*
+ * Transpose of a 3x3 matrix
+ */
+static inline VmathMatrix3 vmathM3Transpose_V( VmathMatrix3 mat );
+
+/*
+ * Compute the inverse of a 3x3 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathMatrix3 vmathM3Inverse_V( VmathMatrix3 mat );
+
+/*
+ * Determinant of a 3x3 matrix
+ */
+static inline float vmathM3Determinant_V( VmathMatrix3 mat );
+
+/*
+ * Conditionally select between two 3x3 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline VmathMatrix3 vmathM3Select_V( VmathMatrix3 mat0, VmathMatrix3 mat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x3 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM3Print_V( VmathMatrix3 mat );
+
+/*
+ * Print a 3x3 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM3Prints_V( VmathMatrix3 mat, const char *name );
+
+#endif
+
+/*
+ * Construct a 4x4 matrix containing the specified columns
+ */
+static inline VmathMatrix4 vmathM4MakeFromCols_V( VmathVector4 col0, VmathVector4 col1, VmathVector4 col2, VmathVector4 col3 );
+
+/*
+ * Construct a 4x4 matrix from a 3x4 transformation matrix
+ */
+static inline VmathMatrix4 vmathM4MakeFromT3_V( VmathTransform3 mat );
+
+/*
+ * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline VmathMatrix4 vmathM4MakeFromM3V3_V( VmathMatrix3 mat, VmathVector3 translateVec );
+
+/*
+ * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline VmathMatrix4 vmathM4MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec );
+
+/*
+ * Set all elements of a 4x4 matrix to the same scalar value
+ */
+static inline VmathMatrix4 vmathM4MakeFromScalar_V( float scalar );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathM4SetUpper3x3_V( VmathMatrix4 *result, VmathMatrix3 mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 4x4 matrix
+ */
+static inline VmathMatrix3 vmathM4GetUpper3x3_V( VmathMatrix4 mat );
+
+/*
+ * Set translation component
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathM4SetTranslation_V( VmathMatrix4 *result, VmathVector3 translateVec );
+
+/*
+ * Get the translation component of a 4x4 matrix
+ */
+static inline VmathVector3 vmathM4GetTranslation_V( VmathMatrix4 mat );
+
+/*
+ * Set column 0 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol0_V( VmathMatrix4 *result, VmathVector4 col0 );
+
+/*
+ * Set column 1 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol1_V( VmathMatrix4 *result, VmathVector4 col1 );
+
+/*
+ * Set column 2 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol2_V( VmathMatrix4 *result, VmathVector4 col2 );
+
+/*
+ * Set column 3 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol3_V( VmathMatrix4 *result, VmathVector4 col3 );
+
+/*
+ * Get column 0 of a 4x4 matrix
+ */
+static inline VmathVector4 vmathM4GetCol0_V( VmathMatrix4 mat );
+
+/*
+ * Get column 1 of a 4x4 matrix
+ */
+static inline VmathVector4 vmathM4GetCol1_V( VmathMatrix4 mat );
+
+/*
+ * Get column 2 of a 4x4 matrix
+ */
+static inline VmathVector4 vmathM4GetCol2_V( VmathMatrix4 mat );
+
+/*
+ * Get column 3 of a 4x4 matrix
+ */
+static inline VmathVector4 vmathM4GetCol3_V( VmathMatrix4 mat );
+
+/*
+ * Set the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4SetCol_V( VmathMatrix4 *result, int col, VmathVector4 vec );
+
+/*
+ * Set the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4SetRow_V( VmathMatrix4 *result, int row, VmathVector4 vec );
+
+/*
+ * Get the column of a 4x4 matrix referred to by the specified index
+ */
+static inline VmathVector4 vmathM4GetCol_V( VmathMatrix4 mat, int col );
+
+/*
+ * Get the row of a 4x4 matrix referred to by the specified index
+ */
+static inline VmathVector4 vmathM4GetRow_V( VmathMatrix4 mat, int row );
+
+/*
+ * Set the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline void vmathM4SetElem_V( VmathMatrix4 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline float vmathM4GetElem_V( VmathMatrix4 mat, int col, int row );
+
+/*
+ * Add two 4x4 matrices
+ */
+static inline VmathMatrix4 vmathM4Add_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
+
+/*
+ * Subtract a 4x4 matrix from another 4x4 matrix
+ */
+static inline VmathMatrix4 vmathM4Sub_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
+
+/*
+ * Negate all elements of a 4x4 matrix
+ */
+static inline VmathMatrix4 vmathM4Neg_V( VmathMatrix4 mat );
+
+/*
+ * Multiply a 4x4 matrix by a scalar
+ */
+static inline VmathMatrix4 vmathM4ScalarMul_V( VmathMatrix4 mat, float scalar );
+
+/*
+ * Multiply a 4x4 matrix by a 4-D vector
+ */
+static inline VmathVector4 vmathM4MulV4_V( VmathMatrix4 mat, VmathVector4 vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D vector
+ */
+static inline VmathVector4 vmathM4MulV3_V( VmathMatrix4 mat, VmathVector3 vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D point
+ */
+static inline VmathVector4 vmathM4MulP3_V( VmathMatrix4 mat, VmathPoint3 pnt );
+
+/*
+ * Multiply two 4x4 matrices
+ */
+static inline VmathMatrix4 vmathM4Mul_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
+
+/*
+ * Multiply a 4x4 matrix by a 3x4 transformation matrix
+ */
+static inline VmathMatrix4 vmathM4MulT3_V( VmathMatrix4 mat, VmathTransform3 tfrm );
+
+/*
+ * Construct an identity 4x4 matrix
+ */
+static inline VmathMatrix4 vmathM4MakeIdentity_V( );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x axis
+ */
+static inline VmathMatrix4 vmathM4MakeRotationX_V( float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the y axis
+ */
+static inline VmathMatrix4 vmathM4MakeRotationY_V( float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the z axis
+ */
+static inline VmathMatrix4 vmathM4MakeRotationZ_V( float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x, y, and z axes
+ */
+static inline VmathMatrix4 vmathM4MakeRotationZYX_V( VmathVector3 radiansXYZ );
+
+/*
+ * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathMatrix4 vmathM4MakeRotationAxis_V( float radians, VmathVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathMatrix4 vmathM4MakeRotationQ_V( VmathQuat unitQuat );
+
+/*
+ * Construct a 4x4 matrix to perform scaling
+ */
+static inline VmathMatrix4 vmathM4MakeScale_V( VmathVector3 scaleVec );
+
+/*
+ * Construct a 4x4 matrix to perform translation
+ */
+static inline VmathMatrix4 vmathM4MakeTranslation_V( VmathVector3 translateVec );
+
+/*
+ * Construct viewing matrix based on eye position, position looked at, and up direction
+ */
+static inline VmathMatrix4 vmathM4MakeLookAt_V( VmathPoint3 eyePos, VmathPoint3 lookAtPos, VmathVector3 upVec );
+
+/*
+ * Construct a perspective projection matrix
+ */
+static inline VmathMatrix4 vmathM4MakePerspective_V( float fovyRadians, float aspect, float zNear, float zFar );
+
+/*
+ * Construct a perspective projection matrix based on frustum
+ */
+static inline VmathMatrix4 vmathM4MakeFrustum_V( float left, float right, float bottom, float top, float zNear, float zFar );
+
+/*
+ * Construct an orthographic projection matrix
+ */
+static inline VmathMatrix4 vmathM4MakeOrthographic_V( float left, float right, float bottom, float top, float zNear, float zFar );
+
+/*
+ * Append (post-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathMatrix4 vmathM4AppendScale_V( VmathMatrix4 mat, VmathVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathMatrix4 vmathM4PrependScale_V( VmathVector3 scaleVec, VmathMatrix4 mat );
+
+/*
+ * Multiply two 4x4 matrices per element
+ */
+static inline VmathMatrix4 vmathM4MulPerElem_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
+
+/*
+ * Compute the absolute value of a 4x4 matrix per element
+ */
+static inline VmathMatrix4 vmathM4AbsPerElem_V( VmathMatrix4 mat );
+
+/*
+ * Transpose of a 4x4 matrix
+ */
+static inline VmathMatrix4 vmathM4Transpose_V( VmathMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathMatrix4 vmathM4Inverse_V( VmathMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathMatrix4 vmathM4AffineInverse_V( VmathMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+ */
+static inline VmathMatrix4 vmathM4OrthoInverse_V( VmathMatrix4 mat );
+
+/*
+ * Determinant of a 4x4 matrix
+ */
+static inline float vmathM4Determinant_V( VmathMatrix4 mat );
+
+/*
+ * Conditionally select between two 4x4 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline VmathMatrix4 vmathM4Select_V( VmathMatrix4 mat0, VmathMatrix4 mat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4x4 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM4Print_V( VmathMatrix4 mat );
+
+/*
+ * Print a 4x4 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM4Prints_V( VmathMatrix4 mat, const char *name );
+
+#endif
+
+/*
+ * Construct a 3x4 transformation matrix containing the specified columns
+ */
+static inline VmathTransform3 vmathT3MakeFromCols_V( VmathVector3 col0, VmathVector3 col1, VmathVector3 col2, VmathVector3 col3 );
+
+/*
+ * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline VmathTransform3 vmathT3MakeFromM3V3_V( VmathMatrix3 tfrm, VmathVector3 translateVec );
+
+/*
+ * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline VmathTransform3 vmathT3MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec );
+
+/*
+ * Set all elements of a 3x4 transformation matrix to the same scalar value
+ */
+static inline VmathTransform3 vmathT3MakeFromScalar_V( float scalar );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ */
+static inline void vmathT3SetUpper3x3_V( VmathTransform3 *result, VmathMatrix3 mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+ */
+static inline VmathMatrix3 vmathT3GetUpper3x3_V( VmathTransform3 tfrm );
+
+/*
+ * Set translation component
+ */
+static inline void vmathT3SetTranslation_V( VmathTransform3 *result, VmathVector3 translateVec );
+
+/*
+ * Get the translation component of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetTranslation_V( VmathTransform3 tfrm );
+
+/*
+ * Set column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol0_V( VmathTransform3 *result, VmathVector3 col0 );
+
+/*
+ * Set column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol1_V( VmathTransform3 *result, VmathVector3 col1 );
+
+/*
+ * Set column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol2_V( VmathTransform3 *result, VmathVector3 col2 );
+
+/*
+ * Set column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol3_V( VmathTransform3 *result, VmathVector3 col3 );
+
+/*
+ * Get column 0 of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetCol0_V( VmathTransform3 tfrm );
+
+/*
+ * Get column 1 of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetCol1_V( VmathTransform3 tfrm );
+
+/*
+ * Get column 2 of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetCol2_V( VmathTransform3 tfrm );
+
+/*
+ * Get column 3 of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetCol3_V( VmathTransform3 tfrm );
+
+/*
+ * Set the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3SetCol_V( VmathTransform3 *result, int col, VmathVector3 vec );
+
+/*
+ * Set the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3SetRow_V( VmathTransform3 *result, int row, VmathVector4 vec );
+
+/*
+ * Get the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline VmathVector3 vmathT3GetCol_V( VmathTransform3 tfrm, int col );
+
+/*
+ * Get the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline VmathVector4 vmathT3GetRow_V( VmathTransform3 tfrm, int row );
+
+/*
+ * Set the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline void vmathT3SetElem_V( VmathTransform3 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline float vmathT3GetElem_V( VmathTransform3 tfrm, int col, int row );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D vector
+ */
+static inline VmathVector3 vmathT3MulV3_V( VmathTransform3 tfrm, VmathVector3 vec );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D point
+ */
+static inline VmathPoint3 vmathT3MulP3_V( VmathTransform3 tfrm, VmathPoint3 pnt );
+
+/*
+ * Multiply two 3x4 transformation matrices
+ */
+static inline VmathTransform3 vmathT3Mul_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 );
+
+/*
+ * Construct an identity 3x4 transformation matrix
+ */
+static inline VmathTransform3 vmathT3MakeIdentity_V( );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x axis
+ */
+static inline VmathTransform3 vmathT3MakeRotationX_V( float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the y axis
+ */
+static inline VmathTransform3 vmathT3MakeRotationY_V( float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the z axis
+ */
+static inline VmathTransform3 vmathT3MakeRotationZ_V( float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+ */
+static inline VmathTransform3 vmathT3MakeRotationZYX_V( VmathVector3 radiansXYZ );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathTransform3 vmathT3MakeRotationAxis_V( float radians, VmathVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathTransform3 vmathT3MakeRotationQ_V( VmathQuat unitQuat );
+
+/*
+ * Construct a 3x4 transformation matrix to perform scaling
+ */
+static inline VmathTransform3 vmathT3MakeScale_V( VmathVector3 scaleVec );
+
+/*
+ * Construct a 3x4 transformation matrix to perform translation
+ */
+static inline VmathTransform3 vmathT3MakeTranslation_V( VmathVector3 translateVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathTransform3 vmathT3AppendScale_V( VmathTransform3 tfrm, VmathVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathTransform3 vmathT3PrependScale_V( VmathVector3 scaleVec, VmathTransform3 tfrm );
+
+/*
+ * Multiply two 3x4 transformation matrices per element
+ */
+static inline VmathTransform3 vmathT3MulPerElem_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 );
+
+/*
+ * Compute the absolute value of a 3x4 transformation matrix per element
+ */
+static inline VmathTransform3 vmathT3AbsPerElem_V( VmathTransform3 tfrm );
+
+/*
+ * Inverse of a 3x4 transformation matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+ */
+static inline VmathTransform3 vmathT3Inverse_V( VmathTransform3 tfrm );
+
+/*
+ * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+ */
+static inline VmathTransform3 vmathT3OrthoInverse_V( VmathTransform3 tfrm );
+
+/*
+ * Conditionally select between two 3x4 transformation matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline VmathTransform3 vmathT3Select_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x4 transformation matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathT3Print_V( VmathTransform3 tfrm );
+
+/*
+ * Print a 3x4 transformation matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathT3Prints_V( VmathTransform3 tfrm, const char *name );
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#include "vectormath_aos.h"
+#include "vec_aos_v.h"
+#include "quat_aos_v.h"
+#include "mat_aos_v.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_soa.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_soa.h
index f52b5aa9d..0188dcb3a 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_soa.h
@@ -1,2013 +1,2013 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_SOA_C_PPU_H
-#define _VECTORMATH_SOA_C_PPU_H
-
-#include <math.h>
-#include <altivec.h>
-#include "vectormath_aos.h"
-
-#ifdef _VECTORMATH_DEBUG
-#include <stdio.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#ifndef _VECTORMATH_SOA_C_TYPES_H
-#define _VECTORMATH_SOA_C_TYPES_H
-
-/* A set of four 3-D vectors in structure-of-arrays format
- */
-typedef struct _VmathSoaVector3
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-} VmathSoaVector3;
-
-/* A set of four 4-D vectors in structure-of-arrays format
- */
-typedef struct _VmathSoaVector4
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-    vec_float4 w;
-} VmathSoaVector4;
-
-/* A set of four 3-D points in structure-of-arrays format
- */
-typedef struct _VmathSoaPoint3
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-} VmathSoaPoint3;
-
-/* A set of four quaternions in structure-of-arrays format
- */
-typedef struct _VmathSoaQuat
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-    vec_float4 w;
-} VmathSoaQuat;
-
-/* A set of four 3x3 matrices in structure-of-arrays format
- */
-typedef struct _VmathSoaMatrix3
-{
-    VmathSoaVector3 col0;
-    VmathSoaVector3 col1;
-    VmathSoaVector3 col2;
-} VmathSoaMatrix3;
-
-/* A set of four 4x4 matrices in structure-of-arrays format
- */
-typedef struct _VmathSoaMatrix4
-{
-    VmathSoaVector4 col0;
-    VmathSoaVector4 col1;
-    VmathSoaVector4 col2;
-    VmathSoaVector4 col3;
-} VmathSoaMatrix4;
-
-/* A set of four 3x4 transformation matrices in structure-of-arrays format
- */
-typedef struct _VmathSoaTransform3
-{
-    VmathSoaVector3 col0;
-    VmathSoaVector3 col1;
-    VmathSoaVector3 col2;
-    VmathSoaVector3 col3;
-} VmathSoaTransform3;
-
-#endif
-
-/*
- * Copy a 3-D vector
- */
-static inline void vmathSoaV3Copy( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Construct a 3-D vector from x, y, and z elements
- */
-static inline void vmathSoaV3MakeFromElems( VmathSoaVector3 *result, vec_float4 x, vec_float4 y, vec_float4 z );
-
-/*
- * Copy elements from a 3-D point into a 3-D vector
- */
-static inline void vmathSoaV3MakeFromP3( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Set all elements of a 3-D vector to the same scalar value
- */
-static inline void vmathSoaV3MakeFromScalar( VmathSoaVector3 *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS 3-D vector
- */
-static inline void vmathSoaV3MakeFromAos( VmathSoaVector3 *result, const VmathVector3 *vec );
-
-/*
- * Insert four AoS 3-D vectors
- */
-static inline void vmathSoaV3MakeFrom4Aos( VmathSoaVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3 );
-
-/*
- * Extract four AoS 3-D vectors
- */
-static inline void vmathSoaV3Get4Aos( const VmathSoaVector3 *vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 );
-
-/*
- * Set the x element of a 3-D vector
- */
-static inline void vmathSoaV3SetX( VmathSoaVector3 *result, vec_float4 x );
-
-/*
- * Set the y element of a 3-D vector
- */
-static inline void vmathSoaV3SetY( VmathSoaVector3 *result, vec_float4 y );
-
-/*
- * Set the z element of a 3-D vector
- */
-static inline void vmathSoaV3SetZ( VmathSoaVector3 *result, vec_float4 z );
-
-/*
- * Get the x element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3GetX( const VmathSoaVector3 *vec );
-
-/*
- * Get the y element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3GetY( const VmathSoaVector3 *vec );
-
-/*
- * Get the z element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3GetZ( const VmathSoaVector3 *vec );
-
-/*
- * Set an x, y, or z element of a 3-D vector by index
- */
-static inline void vmathSoaV3SetElem( VmathSoaVector3 *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, or z element of a 3-D vector by index
- */
-static inline vec_float4 vmathSoaV3GetElem( const VmathSoaVector3 *vec, int idx );
-
-/*
- * Add two 3-D vectors
- */
-static inline void vmathSoaV3Add( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Subtract a 3-D vector from another 3-D vector
- */
-static inline void vmathSoaV3Sub( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Add a 3-D vector to a 3-D point
- */
-static inline void vmathSoaV3AddP3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec, const VmathSoaPoint3 *pnt );
-
-/*
- * Multiply a 3-D vector by a scalar
- */
-static inline void vmathSoaV3ScalarMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar );
-
-/*
- * Divide a 3-D vector by a scalar
- */
-static inline void vmathSoaV3ScalarDiv( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar );
-
-/*
- * Negate all elements of a 3-D vector
- */
-static inline void vmathSoaV3Neg( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Construct x axis
- */
-static inline void vmathSoaV3MakeXAxis( VmathSoaVector3 *result );
-
-/*
- * Construct y axis
- */
-static inline void vmathSoaV3MakeYAxis( VmathSoaVector3 *result );
-
-/*
- * Construct z axis
- */
-static inline void vmathSoaV3MakeZAxis( VmathSoaVector3 *result );
-
-/*
- * Multiply two 3-D vectors per element
- */
-static inline void vmathSoaV3MulPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Divide two 3-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathSoaV3DivPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Compute the reciprocal of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathSoaV3RecipPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Compute the square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathSoaV3SqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Compute the reciprocal square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathSoaV3RsqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Compute the absolute value of a 3-D vector per element
- */
-static inline void vmathSoaV3AbsPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Copy sign from one 3-D vector to another, per element
- */
-static inline void vmathSoaV3CopySignPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Maximum of two 3-D vectors per element
- */
-static inline void vmathSoaV3MaxPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Minimum of two 3-D vectors per element
- */
-static inline void vmathSoaV3MinPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Maximum element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3MaxElem( const VmathSoaVector3 *vec );
-
-/*
- * Minimum element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3MinElem( const VmathSoaVector3 *vec );
-
-/*
- * Compute the sum of all elements of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3Sum( const VmathSoaVector3 *vec );
-
-/*
- * Compute the dot product of two 3-D vectors
- */
-static inline vec_float4 vmathSoaV3Dot( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Compute the square of the length of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3LengthSqr( const VmathSoaVector3 *vec );
-
-/*
- * Compute the length of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3Length( const VmathSoaVector3 *vec );
-
-/*
- * Normalize a 3-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline void vmathSoaV3Normalize( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Compute cross product of two 3-D vectors
- */
-static inline void vmathSoaV3Cross( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Outer product of two 3-D vectors
- */
-static inline void vmathSoaV3Outer( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Pre-multiply a row vector by a 3x3 matrix
- */
-static inline void vmathSoaV3RowMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat );
-
-/*
- * Cross-product matrix of a 3-D vector
- */
-static inline void vmathSoaV3CrossMatrix( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Create cross-product matrix and multiply
- * NOTE: 
- * Faster than separately creating a cross-product matrix and multiplying.
- */
-static inline void vmathSoaV3CrossMatrixMul( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat );
-
-/*
- * Linear interpolation between two 3-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaV3Lerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Spherical linear interpolation between two 3-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaV3Slerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 );
-
-/*
- * Conditionally select between two 3-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaV3Select( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_uint4 select1 );
-
-/*
- * Load four three-float 3-D vectors, stored in three quadwords
- */
-static inline void vmathSoaV3LoadXYZArray( VmathSoaVector3 *vec, const vec_float4 *threeQuads );
-
-/*
- * Store four slots of an SoA 3-D vector in three quadwords
- */
-static inline void vmathSoaV3StoreXYZArray( const VmathSoaVector3 *vec, vec_float4 *threeQuads );
-
-/*
- * Store eight slots of two SoA 3-D vectors as half-floats
- */
-static inline void vmathSoaV3StoreHalfFloats( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV3Print( const VmathSoaVector3 *vec );
-
-/*
- * Print a 3-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV3Prints( const VmathSoaVector3 *vec, const char *name );
-
-#endif
-
-/*
- * Copy a 4-D vector
- */
-static inline void vmathSoaV4Copy( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Construct a 4-D vector from x, y, z, and w elements
- */
-static inline void vmathSoaV4MakeFromElems( VmathSoaVector4 *result, vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
-
-/*
- * Construct a 4-D vector from a 3-D vector and a scalar
- */
-static inline void vmathSoaV4MakeFromV3Scalar( VmathSoaVector4 *result, const VmathSoaVector3 *xyz, vec_float4 w );
-
-/*
- * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
- */
-static inline void vmathSoaV4MakeFromV3( VmathSoaVector4 *result, const VmathSoaVector3 *vec );
-
-/*
- * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
- */
-static inline void vmathSoaV4MakeFromP3( VmathSoaVector4 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Copy elements from a quaternion into a 4-D vector
- */
-static inline void vmathSoaV4MakeFromQ( VmathSoaVector4 *result, const VmathSoaQuat *quat );
-
-/*
- * Set all elements of a 4-D vector to the same scalar value
- */
-static inline void vmathSoaV4MakeFromScalar( VmathSoaVector4 *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS 4-D vector
- */
-static inline void vmathSoaV4MakeFromAos( VmathSoaVector4 *result, const VmathVector4 *vec );
-
-/*
- * Insert four AoS 4-D vectors
- */
-static inline void vmathSoaV4MakeFrom4Aos( VmathSoaVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3 );
-
-/*
- * Extract four AoS 4-D vectors
- */
-static inline void vmathSoaV4Get4Aos( const VmathSoaVector4 *vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 );
-
-/*
- * Set the x, y, and z elements of a 4-D vector
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathSoaV4SetXYZ( VmathSoaVector4 *result, const VmathSoaVector3 *vec );
-
-/*
- * Get the x, y, and z elements of a 4-D vector
- */
-static inline void vmathSoaV4GetXYZ( VmathSoaVector3 *result, const VmathSoaVector4 *vec );
-
-/*
- * Set the x element of a 4-D vector
- */
-static inline void vmathSoaV4SetX( VmathSoaVector4 *result, vec_float4 x );
-
-/*
- * Set the y element of a 4-D vector
- */
-static inline void vmathSoaV4SetY( VmathSoaVector4 *result, vec_float4 y );
-
-/*
- * Set the z element of a 4-D vector
- */
-static inline void vmathSoaV4SetZ( VmathSoaVector4 *result, vec_float4 z );
-
-/*
- * Set the w element of a 4-D vector
- */
-static inline void vmathSoaV4SetW( VmathSoaVector4 *result, vec_float4 w );
-
-/*
- * Get the x element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetX( const VmathSoaVector4 *vec );
-
-/*
- * Get the y element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetY( const VmathSoaVector4 *vec );
-
-/*
- * Get the z element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetZ( const VmathSoaVector4 *vec );
-
-/*
- * Get the w element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetW( const VmathSoaVector4 *vec );
-
-/*
- * Set an x, y, z, or w element of a 4-D vector by index
- */
-static inline void vmathSoaV4SetElem( VmathSoaVector4 *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, z, or w element of a 4-D vector by index
- */
-static inline vec_float4 vmathSoaV4GetElem( const VmathSoaVector4 *vec, int idx );
-
-/*
- * Add two 4-D vectors
- */
-static inline void vmathSoaV4Add( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Subtract a 4-D vector from another 4-D vector
- */
-static inline void vmathSoaV4Sub( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Multiply a 4-D vector by a scalar
- */
-static inline void vmathSoaV4ScalarMul( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar );
-
-/*
- * Divide a 4-D vector by a scalar
- */
-static inline void vmathSoaV4ScalarDiv( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar );
-
-/*
- * Negate all elements of a 4-D vector
- */
-static inline void vmathSoaV4Neg( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Construct x axis
- */
-static inline void vmathSoaV4MakeXAxis( VmathSoaVector4 *result );
-
-/*
- * Construct y axis
- */
-static inline void vmathSoaV4MakeYAxis( VmathSoaVector4 *result );
-
-/*
- * Construct z axis
- */
-static inline void vmathSoaV4MakeZAxis( VmathSoaVector4 *result );
-
-/*
- * Construct w axis
- */
-static inline void vmathSoaV4MakeWAxis( VmathSoaVector4 *result );
-
-/*
- * Multiply two 4-D vectors per element
- */
-static inline void vmathSoaV4MulPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Divide two 4-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathSoaV4DivPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Compute the reciprocal of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathSoaV4RecipPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Compute the square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathSoaV4SqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Compute the reciprocal square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathSoaV4RsqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Compute the absolute value of a 4-D vector per element
- */
-static inline void vmathSoaV4AbsPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Copy sign from one 4-D vector to another, per element
- */
-static inline void vmathSoaV4CopySignPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Maximum of two 4-D vectors per element
- */
-static inline void vmathSoaV4MaxPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Minimum of two 4-D vectors per element
- */
-static inline void vmathSoaV4MinPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Maximum element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4MaxElem( const VmathSoaVector4 *vec );
-
-/*
- * Minimum element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4MinElem( const VmathSoaVector4 *vec );
-
-/*
- * Compute the sum of all elements of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4Sum( const VmathSoaVector4 *vec );
-
-/*
- * Compute the dot product of two 4-D vectors
- */
-static inline vec_float4 vmathSoaV4Dot( const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Compute the square of the length of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4LengthSqr( const VmathSoaVector4 *vec );
-
-/*
- * Compute the length of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4Length( const VmathSoaVector4 *vec );
-
-/*
- * Normalize a 4-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline void vmathSoaV4Normalize( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Outer product of two 4-D vectors
- */
-static inline void vmathSoaV4Outer( VmathSoaMatrix4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Linear interpolation between two 4-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaV4Lerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Spherical linear interpolation between two 4-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaV4Slerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *unitVec0, const VmathSoaVector4 *unitVec1 );
-
-/*
- * Conditionally select between two 4-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaV4Select( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1, vec_uint4 select1 );
-
-/*
- * Store four slots of an SoA 4-D vector as half-floats
- */
-static inline void vmathSoaV4StoreHalfFloats( const VmathSoaVector4 *vec, vec_ushort8 *twoQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV4Print( const VmathSoaVector4 *vec );
-
-/*
- * Print a 4-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV4Prints( const VmathSoaVector4 *vec, const char *name );
-
-#endif
-
-/*
- * Copy a 3-D point
- */
-static inline void vmathSoaP3Copy( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Construct a 3-D point from x, y, and z elements
- */
-static inline void vmathSoaP3MakeFromElems( VmathSoaPoint3 *result, vec_float4 x, vec_float4 y, vec_float4 z );
-
-/*
- * Copy elements from a 3-D vector into a 3-D point
- */
-static inline void vmathSoaP3MakeFromV3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Set all elements of a 3-D point to the same scalar value
- */
-static inline void vmathSoaP3MakeFromScalar( VmathSoaPoint3 *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS 3-D point
- */
-static inline void vmathSoaP3MakeFromAos( VmathSoaPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Insert four AoS 3-D points
- */
-static inline void vmathSoaP3MakeFrom4Aos( VmathSoaPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3 );
-
-/*
- * Extract four AoS 3-D points
- */
-static inline void vmathSoaP3Get4Aos( const VmathSoaPoint3 *pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 );
-
-/*
- * Set the x element of a 3-D point
- */
-static inline void vmathSoaP3SetX( VmathSoaPoint3 *result, vec_float4 x );
-
-/*
- * Set the y element of a 3-D point
- */
-static inline void vmathSoaP3SetY( VmathSoaPoint3 *result, vec_float4 y );
-
-/*
- * Set the z element of a 3-D point
- */
-static inline void vmathSoaP3SetZ( VmathSoaPoint3 *result, vec_float4 z );
-
-/*
- * Get the x element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3GetX( const VmathSoaPoint3 *pnt );
-
-/*
- * Get the y element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3GetY( const VmathSoaPoint3 *pnt );
-
-/*
- * Get the z element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3GetZ( const VmathSoaPoint3 *pnt );
-
-/*
- * Set an x, y, or z element of a 3-D point by index
- */
-static inline void vmathSoaP3SetElem( VmathSoaPoint3 *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, or z element of a 3-D point by index
- */
-static inline vec_float4 vmathSoaP3GetElem( const VmathSoaPoint3 *pnt, int idx );
-
-/*
- * Subtract a 3-D point from another 3-D point
- */
-static inline void vmathSoaP3Sub( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Add a 3-D point to a 3-D vector
- */
-static inline void vmathSoaP3AddV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec );
-
-/*
- * Subtract a 3-D vector from a 3-D point
- */
-static inline void vmathSoaP3SubV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec );
-
-/*
- * Multiply two 3-D points per element
- */
-static inline void vmathSoaP3MulPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Divide two 3-D points per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathSoaP3DivPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Compute the reciprocal of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathSoaP3RecipPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Compute the square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathSoaP3SqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Compute the reciprocal square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathSoaP3RsqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Compute the absolute value of a 3-D point per element
- */
-static inline void vmathSoaP3AbsPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Copy sign from one 3-D point to another, per element
- */
-static inline void vmathSoaP3CopySignPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Maximum of two 3-D points per element
- */
-static inline void vmathSoaP3MaxPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Minimum of two 3-D points per element
- */
-static inline void vmathSoaP3MinPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Maximum element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3MaxElem( const VmathSoaPoint3 *pnt );
-
-/*
- * Minimum element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3MinElem( const VmathSoaPoint3 *pnt );
-
-/*
- * Compute the sum of all elements of a 3-D point
- */
-static inline vec_float4 vmathSoaP3Sum( const VmathSoaPoint3 *pnt );
-
-/*
- * Apply uniform scale to a 3-D point
- */
-static inline void vmathSoaP3Scale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, vec_float4 scaleVal );
-
-/*
- * Apply non-uniform scale to a 3-D point
- */
-static inline void vmathSoaP3NonUniformScale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *scaleVec );
-
-/*
- * Scalar projection of a 3-D point on a unit-length 3-D vector
- */
-static inline vec_float4 vmathSoaP3Projection( const VmathSoaPoint3 *pnt, const VmathSoaVector3 *unitVec );
-
-/*
- * Compute the square of the distance of a 3-D point from the coordinate-system origin
- */
-static inline vec_float4 vmathSoaP3DistSqrFromOrigin( const VmathSoaPoint3 *pnt );
-
-/*
- * Compute the distance of a 3-D point from the coordinate-system origin
- */
-static inline vec_float4 vmathSoaP3DistFromOrigin( const VmathSoaPoint3 *pnt );
-
-/*
- * Compute the square of the distance between two 3-D points
- */
-static inline vec_float4 vmathSoaP3DistSqr( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Compute the distance between two 3-D points
- */
-static inline vec_float4 vmathSoaP3Dist( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Linear interpolation between two 3-D points
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaP3Lerp( VmathSoaPoint3 *result, vec_float4 t, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Conditionally select between two 3-D points
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaP3Select( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_uint4 select1 );
-
-/*
- * Load four three-float 3-D points, stored in three quadwords
- */
-static inline void vmathSoaP3LoadXYZArray( VmathSoaPoint3 *pnt, const vec_float4 *threeQuads );
-
-/*
- * Store four slots of an SoA 3-D point in three quadwords
- */
-static inline void vmathSoaP3StoreXYZArray( const VmathSoaPoint3 *pnt, vec_float4 *threeQuads );
-
-/*
- * Store eight slots of two SoA 3-D points as half-floats
- */
-static inline void vmathSoaP3StoreHalfFloats( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D point
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaP3Print( const VmathSoaPoint3 *pnt );
-
-/*
- * Print a 3-D point and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaP3Prints( const VmathSoaPoint3 *pnt, const char *name );
-
-#endif
-
-/*
- * Copy a quaternion
- */
-static inline void vmathSoaQCopy( VmathSoaQuat *result, const VmathSoaQuat *quat );
-
-/*
- * Construct a quaternion from x, y, z, and w elements
- */
-static inline void vmathSoaQMakeFromElems( VmathSoaQuat *result, vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
-
-/*
- * Construct a quaternion from a 3-D vector and a scalar
- */
-static inline void vmathSoaQMakeFromV3Scalar( VmathSoaQuat *result, const VmathSoaVector3 *xyz, vec_float4 w );
-
-/*
- * Copy elements from a 4-D vector into a quaternion
- */
-static inline void vmathSoaQMakeFromV4( VmathSoaQuat *result, const VmathSoaVector4 *vec );
-
-/*
- * Convert a rotation matrix to a unit-length quaternion
- */
-static inline void vmathSoaQMakeFromM3( VmathSoaQuat *result, const VmathSoaMatrix3 *rotMat );
-
-/*
- * Set all elements of a quaternion to the same scalar value
- */
-static inline void vmathSoaQMakeFromScalar( VmathSoaQuat *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS quaternion
- */
-static inline void vmathSoaQMakeFromAos( VmathSoaQuat *result, const VmathQuat *quat );
-
-/*
- * Insert four AoS quaternions
- */
-static inline void vmathSoaQMakeFrom4Aos( VmathSoaQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, const VmathQuat *quat2, const VmathQuat *quat3 );
-
-/*
- * Extract four AoS quaternions
- */
-static inline void vmathSoaQGet4Aos( const VmathSoaQuat *quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 );
-
-/*
- * Set the x, y, and z elements of a quaternion
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathSoaQSetXYZ( VmathSoaQuat *result, const VmathSoaVector3 *vec );
-
-/*
- * Get the x, y, and z elements of a quaternion
- */
-static inline void vmathSoaQGetXYZ( VmathSoaVector3 *result, const VmathSoaQuat *quat );
-
-/*
- * Set the x element of a quaternion
- */
-static inline void vmathSoaQSetX( VmathSoaQuat *result, vec_float4 x );
-
-/*
- * Set the y element of a quaternion
- */
-static inline void vmathSoaQSetY( VmathSoaQuat *result, vec_float4 y );
-
-/*
- * Set the z element of a quaternion
- */
-static inline void vmathSoaQSetZ( VmathSoaQuat *result, vec_float4 z );
-
-/*
- * Set the w element of a quaternion
- */
-static inline void vmathSoaQSetW( VmathSoaQuat *result, vec_float4 w );
-
-/*
- * Get the x element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetX( const VmathSoaQuat *quat );
-
-/*
- * Get the y element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetY( const VmathSoaQuat *quat );
-
-/*
- * Get the z element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetZ( const VmathSoaQuat *quat );
-
-/*
- * Get the w element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetW( const VmathSoaQuat *quat );
-
-/*
- * Set an x, y, z, or w element of a quaternion by index
- */
-static inline void vmathSoaQSetElem( VmathSoaQuat *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, z, or w element of a quaternion by index
- */
-static inline vec_float4 vmathSoaQGetElem( const VmathSoaQuat *quat, int idx );
-
-/*
- * Add two quaternions
- */
-static inline void vmathSoaQAdd( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
-
-/*
- * Subtract a quaternion from another quaternion
- */
-static inline void vmathSoaQSub( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
-
-/*
- * Multiply two quaternions
- */
-static inline void vmathSoaQMul( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
-
-/*
- * Multiply a quaternion by a scalar
- */
-static inline void vmathSoaQScalarMul( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar );
-
-/*
- * Divide a quaternion by a scalar
- */
-static inline void vmathSoaQScalarDiv( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar );
-
-/*
- * Negate all elements of a quaternion
- */
-static inline void vmathSoaQNeg( VmathSoaQuat *result, const VmathSoaQuat *quat );
-
-/*
- * Construct an identity quaternion
- */
-static inline void vmathSoaQMakeIdentity( VmathSoaQuat *result );
-
-/*
- * Construct a quaternion to rotate between two unit-length 3-D vectors
- * NOTE: 
- * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
- */
-static inline void vmathSoaQMakeRotationArc( VmathSoaQuat *result, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 );
-
-/*
- * Construct a quaternion to rotate around a unit-length 3-D vector
- */
-static inline void vmathSoaQMakeRotationAxis( VmathSoaQuat *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
-
-/*
- * Construct a quaternion to rotate around the x axis
- */
-static inline void vmathSoaQMakeRotationX( VmathSoaQuat *result, vec_float4 radians );
-
-/*
- * Construct a quaternion to rotate around the y axis
- */
-static inline void vmathSoaQMakeRotationY( VmathSoaQuat *result, vec_float4 radians );
-
-/*
- * Construct a quaternion to rotate around the z axis
- */
-static inline void vmathSoaQMakeRotationZ( VmathSoaQuat *result, vec_float4 radians );
-
-/*
- * Compute the conjugate of a quaternion
- */
-static inline void vmathSoaQConj( VmathSoaQuat *result, const VmathSoaQuat *quat );
-
-/*
- * Use a unit-length quaternion to rotate a 3-D vector
- */
-static inline void vmathSoaQRotate( VmathSoaVector3 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *vec );
-
-/*
- * Compute the dot product of two quaternions
- */
-static inline vec_float4 vmathSoaQDot( const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
-
-/*
- * Compute the norm of a quaternion
- */
-static inline vec_float4 vmathSoaQNorm( const VmathSoaQuat *quat );
-
-/*
- * Compute the length of a quaternion
- */
-static inline vec_float4 vmathSoaQLength( const VmathSoaQuat *quat );
-
-/*
- * Normalize a quaternion
- * NOTE: 
- * The result is unpredictable when all elements of quat are at or near zero.
- */
-static inline void vmathSoaQNormalize( VmathSoaQuat *result, const VmathSoaQuat *quat );
-
-/*
- * Linear interpolation between two quaternions
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaQLerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
-
-/*
- * Spherical linear interpolation between two quaternions
- * NOTE: 
- * Interpolates along the shortest path between orientations.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaQSlerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1 );
-
-/*
- * Spherical quadrangle interpolation
- */
-static inline void vmathSoaQSquad( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1, const VmathSoaQuat *unitQuat2, const VmathSoaQuat *unitQuat3 );
-
-/*
- * Conditionally select between two quaternions
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaQSelect( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a quaternion
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaQPrint( const VmathSoaQuat *quat );
-
-/*
- * Print a quaternion and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaQPrints( const VmathSoaQuat *quat, const char *name );
-
-#endif
-
-/*
- * Copy a 3x3 matrix
- */
-static inline void vmathSoaM3Copy( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Construct a 3x3 matrix containing the specified columns
- */
-static inline void vmathSoaM3MakeFromCols( VmathSoaMatrix3 *result, const VmathSoaVector3 *col0, const VmathSoaVector3 *col1, const VmathSoaVector3 *col2 );
-
-/*
- * Construct a 3x3 rotation matrix from a unit-length quaternion
- */
-static inline void vmathSoaM3MakeFromQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat );
-
-/*
- * Set all elements of a 3x3 matrix to the same scalar value
- */
-static inline void vmathSoaM3MakeFromScalar( VmathSoaMatrix3 *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS 3x3 matrix
- */
-static inline void vmathSoaM3MakeFromAos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Insert four AoS 3x3 matrices
- */
-static inline void vmathSoaM3MakeFrom4Aos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, const VmathMatrix3 *mat2, const VmathMatrix3 *mat3 );
-
-/*
- * Extract four AoS 3x3 matrices
- */
-static inline void vmathSoaM3Get4Aos( const VmathSoaMatrix3 *mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 );
-
-/*
- * Set column 0 of a 3x3 matrix
- */
-static inline void vmathSoaM3SetCol0( VmathSoaMatrix3 *result, const VmathSoaVector3 *col0 );
-
-/*
- * Set column 1 of a 3x3 matrix
- */
-static inline void vmathSoaM3SetCol1( VmathSoaMatrix3 *result, const VmathSoaVector3 *col1 );
-
-/*
- * Set column 2 of a 3x3 matrix
- */
-static inline void vmathSoaM3SetCol2( VmathSoaMatrix3 *result, const VmathSoaVector3 *col2 );
-
-/*
- * Get column 0 of a 3x3 matrix
- */
-static inline void vmathSoaM3GetCol0( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Get column 1 of a 3x3 matrix
- */
-static inline void vmathSoaM3GetCol1( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Get column 2 of a 3x3 matrix
- */
-static inline void vmathSoaM3GetCol2( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Set the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathSoaM3SetCol( VmathSoaMatrix3 *result, int col, const VmathSoaVector3 *vec );
-
-/*
- * Set the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathSoaM3SetRow( VmathSoaMatrix3 *result, int row, const VmathSoaVector3 *vec );
-
-/*
- * Get the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathSoaM3GetCol( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int col );
-
-/*
- * Get the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathSoaM3GetRow( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int row );
-
-/*
- * Set the element of a 3x3 matrix referred to by column and row indices
- */
-static inline void vmathSoaM3SetElem( VmathSoaMatrix3 *result, int col, int row, vec_float4 val );
-
-/*
- * Get the element of a 3x3 matrix referred to by column and row indices
- */
-static inline vec_float4 vmathSoaM3GetElem( const VmathSoaMatrix3 *mat, int col, int row );
-
-/*
- * Add two 3x3 matrices
- */
-static inline void vmathSoaM3Add( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
-
-/*
- * Subtract a 3x3 matrix from another 3x3 matrix
- */
-static inline void vmathSoaM3Sub( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
-
-/*
- * Negate all elements of a 3x3 matrix
- */
-static inline void vmathSoaM3Neg( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Multiply a 3x3 matrix by a scalar
- */
-static inline void vmathSoaM3ScalarMul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, vec_float4 scalar );
-
-/*
- * Multiply a 3x3 matrix by a 3-D vector
- */
-static inline void vmathSoaM3MulV3( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *vec );
-
-/*
- * Multiply two 3x3 matrices
- */
-static inline void vmathSoaM3Mul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
-
-/*
- * Construct an identity 3x3 matrix
- */
-static inline void vmathSoaM3MakeIdentity( VmathSoaMatrix3 *result );
-
-/*
- * Construct a 3x3 matrix to rotate around the x axis
- */
-static inline void vmathSoaM3MakeRotationX( VmathSoaMatrix3 *result, vec_float4 radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the y axis
- */
-static inline void vmathSoaM3MakeRotationY( VmathSoaMatrix3 *result, vec_float4 radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the z axis
- */
-static inline void vmathSoaM3MakeRotationZ( VmathSoaMatrix3 *result, vec_float4 radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the x, y, and z axes
- */
-static inline void vmathSoaM3MakeRotationZYX( VmathSoaMatrix3 *result, const VmathSoaVector3 *radiansXYZ );
-
-/*
- * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathSoaM3MakeRotationAxis( VmathSoaMatrix3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathSoaM3MakeRotationQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat );
-
-/*
- * Construct a 3x3 matrix to perform scaling
- */
-static inline void vmathSoaM3MakeScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathSoaM3AppendScale( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathSoaM3PrependScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix3 *mat );
-
-/*
- * Multiply two 3x3 matrices per element
- */
-static inline void vmathSoaM3MulPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
-
-/*
- * Compute the absolute value of a 3x3 matrix per element
- */
-static inline void vmathSoaM3AbsPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Transpose of a 3x3 matrix
- */
-static inline void vmathSoaM3Transpose( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Compute the inverse of a 3x3 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathSoaM3Inverse( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Determinant of a 3x3 matrix
- */
-static inline vec_float4 vmathSoaM3Determinant( const VmathSoaMatrix3 *mat );
-
-/*
- * Conditionally select between two 3x3 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaM3Select( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x3 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM3Print( const VmathSoaMatrix3 *mat );
-
-/*
- * Print a 3x3 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM3Prints( const VmathSoaMatrix3 *mat, const char *name );
-
-#endif
-
-/*
- * Copy a 4x4 matrix
- */
-static inline void vmathSoaM4Copy( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Construct a 4x4 matrix containing the specified columns
- */
-static inline void vmathSoaM4MakeFromCols( VmathSoaMatrix4 *result, const VmathSoaVector4 *col0, const VmathSoaVector4 *col1, const VmathSoaVector4 *col2, const VmathSoaVector4 *col3 );
-
-/*
- * Construct a 4x4 matrix from a 3x4 transformation matrix
- */
-static inline void vmathSoaM4MakeFromT3( VmathSoaMatrix4 *result, const VmathSoaTransform3 *mat );
-
-/*
- * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
- */
-static inline void vmathSoaM4MakeFromM3V3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *translateVec );
-
-/*
- * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
- */
-static inline void vmathSoaM4MakeFromQV3( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec );
-
-/*
- * Set all elements of a 4x4 matrix to the same scalar value
- */
-static inline void vmathSoaM4MakeFromScalar( VmathSoaMatrix4 *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS 4x4 matrix
- */
-static inline void vmathSoaM4MakeFromAos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Insert four AoS 4x4 matrices
- */
-static inline void vmathSoaM4MakeFrom4Aos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, const VmathMatrix4 *mat2, const VmathMatrix4 *mat3 );
-
-/*
- * Extract four AoS 4x4 matrices
- */
-static inline void vmathSoaM4Get4Aos( const VmathSoaMatrix4 *mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 );
-
-/*
- * Set the upper-left 3x3 submatrix
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathSoaM4SetUpper3x3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 4x4 matrix
- */
-static inline void vmathSoaM4GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Set translation component
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathSoaM4SetTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec );
-
-/*
- * Get the translation component of a 4x4 matrix
- */
-static inline void vmathSoaM4GetTranslation( VmathSoaVector3 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Set column 0 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol0( VmathSoaMatrix4 *result, const VmathSoaVector4 *col0 );
-
-/*
- * Set column 1 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol1( VmathSoaMatrix4 *result, const VmathSoaVector4 *col1 );
-
-/*
- * Set column 2 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol2( VmathSoaMatrix4 *result, const VmathSoaVector4 *col2 );
-
-/*
- * Set column 3 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol3( VmathSoaMatrix4 *result, const VmathSoaVector4 *col3 );
-
-/*
- * Get column 0 of a 4x4 matrix
- */
-static inline void vmathSoaM4GetCol0( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Get column 1 of a 4x4 matrix
- */
-static inline void vmathSoaM4GetCol1( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Get column 2 of a 4x4 matrix
- */
-static inline void vmathSoaM4GetCol2( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Get column 3 of a 4x4 matrix
- */
-static inline void vmathSoaM4GetCol3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Set the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathSoaM4SetCol( VmathSoaMatrix4 *result, int col, const VmathSoaVector4 *vec );
-
-/*
- * Set the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathSoaM4SetRow( VmathSoaMatrix4 *result, int row, const VmathSoaVector4 *vec );
-
-/*
- * Get the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathSoaM4GetCol( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int col );
-
-/*
- * Get the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathSoaM4GetRow( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int row );
-
-/*
- * Set the element of a 4x4 matrix referred to by column and row indices
- */
-static inline void vmathSoaM4SetElem( VmathSoaMatrix4 *result, int col, int row, vec_float4 val );
-
-/*
- * Get the element of a 4x4 matrix referred to by column and row indices
- */
-static inline vec_float4 vmathSoaM4GetElem( const VmathSoaMatrix4 *mat, int col, int row );
-
-/*
- * Add two 4x4 matrices
- */
-static inline void vmathSoaM4Add( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
-
-/*
- * Subtract a 4x4 matrix from another 4x4 matrix
- */
-static inline void vmathSoaM4Sub( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
-
-/*
- * Negate all elements of a 4x4 matrix
- */
-static inline void vmathSoaM4Neg( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Multiply a 4x4 matrix by a scalar
- */
-static inline void vmathSoaM4ScalarMul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, vec_float4 scalar );
-
-/*
- * Multiply a 4x4 matrix by a 4-D vector
- */
-static inline void vmathSoaM4MulV4( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector4 *vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D vector
- */
-static inline void vmathSoaM4MulV3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D point
- */
-static inline void vmathSoaM4MulP3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaPoint3 *pnt );
-
-/*
- * Multiply two 4x4 matrices
- */
-static inline void vmathSoaM4Mul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
-
-/*
- * Multiply a 4x4 matrix by a 3x4 transformation matrix
- */
-static inline void vmathSoaM4MulT3( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaTransform3 *tfrm );
-
-/*
- * Construct an identity 4x4 matrix
- */
-static inline void vmathSoaM4MakeIdentity( VmathSoaMatrix4 *result );
-
-/*
- * Construct a 4x4 matrix to rotate around the x axis
- */
-static inline void vmathSoaM4MakeRotationX( VmathSoaMatrix4 *result, vec_float4 radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the y axis
- */
-static inline void vmathSoaM4MakeRotationY( VmathSoaMatrix4 *result, vec_float4 radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the z axis
- */
-static inline void vmathSoaM4MakeRotationZ( VmathSoaMatrix4 *result, vec_float4 radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the x, y, and z axes
- */
-static inline void vmathSoaM4MakeRotationZYX( VmathSoaMatrix4 *result, const VmathSoaVector3 *radiansXYZ );
-
-/*
- * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathSoaM4MakeRotationAxis( VmathSoaMatrix4 *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathSoaM4MakeRotationQ( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat );
-
-/*
- * Construct a 4x4 matrix to perform scaling
- */
-static inline void vmathSoaM4MakeScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec );
-
-/*
- * Construct a 4x4 matrix to perform translation
- */
-static inline void vmathSoaM4MakeTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec );
-
-/*
- * Construct viewing matrix based on eye position, position looked at, and up direction
- */
-static inline void vmathSoaM4MakeLookAt( VmathSoaMatrix4 *result, const VmathSoaPoint3 *eyePos, const VmathSoaPoint3 *lookAtPos, const VmathSoaVector3 *upVec );
-
-/*
- * Construct a perspective projection matrix
- */
-static inline void vmathSoaM4MakePerspective( VmathSoaMatrix4 *result, vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar );
-
-/*
- * Construct a perspective projection matrix based on frustum
- */
-static inline void vmathSoaM4MakeFrustum( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
-
-/*
- * Construct an orthographic projection matrix
- */
-static inline void vmathSoaM4MakeOrthographic( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
-
-/*
- * Append (post-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathSoaM4AppendScale( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathSoaM4PrependScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix4 *mat );
-
-/*
- * Multiply two 4x4 matrices per element
- */
-static inline void vmathSoaM4MulPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
-
-/*
- * Compute the absolute value of a 4x4 matrix per element
- */
-static inline void vmathSoaM4AbsPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Transpose of a 4x4 matrix
- */
-static inline void vmathSoaM4Transpose( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathSoaM4Inverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathSoaM4AffineInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
- */
-static inline void vmathSoaM4OrthoInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Determinant of a 4x4 matrix
- */
-static inline vec_float4 vmathSoaM4Determinant( const VmathSoaMatrix4 *mat );
-
-/*
- * Conditionally select between two 4x4 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaM4Select( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4x4 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM4Print( const VmathSoaMatrix4 *mat );
-
-/*
- * Print a 4x4 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM4Prints( const VmathSoaMatrix4 *mat, const char *name );
-
-#endif
-
-/*
- * Copy a 3x4 transformation matrix
- */
-static inline void vmathSoaT3Copy( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Construct a 3x4 transformation matrix containing the specified columns
- */
-static inline void vmathSoaT3MakeFromCols( VmathSoaTransform3 *result, const VmathSoaVector3 *col0, const VmathSoaVector3 *col1, const VmathSoaVector3 *col2, const VmathSoaVector3 *col3 );
-
-/*
- * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
- */
-static inline void vmathSoaT3MakeFromM3V3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *tfrm, const VmathSoaVector3 *translateVec );
-
-/*
- * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
- */
-static inline void vmathSoaT3MakeFromQV3( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec );
-
-/*
- * Set all elements of a 3x4 transformation matrix to the same scalar value
- */
-static inline void vmathSoaT3MakeFromScalar( VmathSoaTransform3 *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS 3x4 transformation matrix
- */
-static inline void vmathSoaT3MakeFromAos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Insert four AoS 3x4 transformation matrices
- */
-static inline void vmathSoaT3MakeFrom4Aos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, const VmathTransform3 *tfrm2, const VmathTransform3 *tfrm3 );
-
-/*
- * Extract four AoS 3x4 transformation matrices
- */
-static inline void vmathSoaT3Get4Aos( const VmathSoaTransform3 *tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 );
-
-/*
- * Set the upper-left 3x3 submatrix
- */
-static inline void vmathSoaT3SetUpper3x3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Set translation component
- */
-static inline void vmathSoaT3SetTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec );
-
-/*
- * Get the translation component of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3GetTranslation( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Set column 0 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol0( VmathSoaTransform3 *result, const VmathSoaVector3 *col0 );
-
-/*
- * Set column 1 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol1( VmathSoaTransform3 *result, const VmathSoaVector3 *col1 );
-
-/*
- * Set column 2 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol2( VmathSoaTransform3 *result, const VmathSoaVector3 *col2 );
-
-/*
- * Set column 3 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol3( VmathSoaTransform3 *result, const VmathSoaVector3 *col3 );
-
-/*
- * Get column 0 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3GetCol0( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Get column 1 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3GetCol1( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Get column 2 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3GetCol2( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Get column 3 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3GetCol3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Set the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathSoaT3SetCol( VmathSoaTransform3 *result, int col, const VmathSoaVector3 *vec );
-
-/*
- * Set the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathSoaT3SetRow( VmathSoaTransform3 *result, int row, const VmathSoaVector4 *vec );
-
-/*
- * Get the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathSoaT3GetCol( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, int col );
-
-/*
- * Get the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathSoaT3GetRow( VmathSoaVector4 *result, const VmathSoaTransform3 *tfrm, int row );
-
-/*
- * Set the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline void vmathSoaT3SetElem( VmathSoaTransform3 *result, int col, int row, vec_float4 val );
-
-/*
- * Get the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline vec_float4 vmathSoaT3GetElem( const VmathSoaTransform3 *tfrm, int col, int row );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D vector
- */
-static inline void vmathSoaT3MulV3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *vec );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D point
- */
-static inline void vmathSoaT3MulP3( VmathSoaPoint3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaPoint3 *pnt );
-
-/*
- * Multiply two 3x4 transformation matrices
- */
-static inline void vmathSoaT3Mul( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 );
-
-/*
- * Construct an identity 3x4 transformation matrix
- */
-static inline void vmathSoaT3MakeIdentity( VmathSoaTransform3 *result );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x axis
- */
-static inline void vmathSoaT3MakeRotationX( VmathSoaTransform3 *result, vec_float4 radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the y axis
- */
-static inline void vmathSoaT3MakeRotationY( VmathSoaTransform3 *result, vec_float4 radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the z axis
- */
-static inline void vmathSoaT3MakeRotationZ( VmathSoaTransform3 *result, vec_float4 radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
- */
-static inline void vmathSoaT3MakeRotationZYX( VmathSoaTransform3 *result, const VmathSoaVector3 *radiansXYZ );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathSoaT3MakeRotationAxis( VmathSoaTransform3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathSoaT3MakeRotationQ( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat );
-
-/*
- * Construct a 3x4 transformation matrix to perform scaling
- */
-static inline void vmathSoaT3MakeScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec );
-
-/*
- * Construct a 3x4 transformation matrix to perform translation
- */
-static inline void vmathSoaT3MakeTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathSoaT3AppendScale( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathSoaT3PrependScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaTransform3 *tfrm );
-
-/*
- * Multiply two 3x4 transformation matrices per element
- */
-static inline void vmathSoaT3MulPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 );
-
-/*
- * Compute the absolute value of a 3x4 transformation matrix per element
- */
-static inline void vmathSoaT3AbsPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Inverse of a 3x4 transformation matrix
- * NOTE: 
- * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
- */
-static inline void vmathSoaT3Inverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
- */
-static inline void vmathSoaT3OrthoInverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Conditionally select between two 3x4 transformation matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaT3Select( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x4 transformation matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaT3Print( const VmathSoaTransform3 *tfrm );
-
-/*
- * Print a 3x4 transformation matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaT3Prints( const VmathSoaTransform3 *tfrm, const char *name );
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#include "vec_soa.h"
-#include "quat_soa.h"
-#include "mat_soa.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_SOA_C_PPU_H
+#define _VECTORMATH_SOA_C_PPU_H
+
+#include <math.h>
+#include <altivec.h>
+#include "vectormath_aos.h"
+
+#ifdef _VECTORMATH_DEBUG
+#include <stdio.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef _VECTORMATH_SOA_C_TYPES_H
+#define _VECTORMATH_SOA_C_TYPES_H
+
+/* A set of four 3-D vectors in structure-of-arrays format
+ */
+typedef struct _VmathSoaVector3
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+} VmathSoaVector3;
+
+/* A set of four 4-D vectors in structure-of-arrays format
+ */
+typedef struct _VmathSoaVector4
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+    vec_float4 w;
+} VmathSoaVector4;
+
+/* A set of four 3-D points in structure-of-arrays format
+ */
+typedef struct _VmathSoaPoint3
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+} VmathSoaPoint3;
+
+/* A set of four quaternions in structure-of-arrays format
+ */
+typedef struct _VmathSoaQuat
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+    vec_float4 w;
+} VmathSoaQuat;
+
+/* A set of four 3x3 matrices in structure-of-arrays format
+ */
+typedef struct _VmathSoaMatrix3
+{
+    VmathSoaVector3 col0;
+    VmathSoaVector3 col1;
+    VmathSoaVector3 col2;
+} VmathSoaMatrix3;
+
+/* A set of four 4x4 matrices in structure-of-arrays format
+ */
+typedef struct _VmathSoaMatrix4
+{
+    VmathSoaVector4 col0;
+    VmathSoaVector4 col1;
+    VmathSoaVector4 col2;
+    VmathSoaVector4 col3;
+} VmathSoaMatrix4;
+
+/* A set of four 3x4 transformation matrices in structure-of-arrays format
+ */
+typedef struct _VmathSoaTransform3
+{
+    VmathSoaVector3 col0;
+    VmathSoaVector3 col1;
+    VmathSoaVector3 col2;
+    VmathSoaVector3 col3;
+} VmathSoaTransform3;
+
+#endif
+
+/*
+ * Copy a 3-D vector
+ */
+static inline void vmathSoaV3Copy( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Construct a 3-D vector from x, y, and z elements
+ */
+static inline void vmathSoaV3MakeFromElems( VmathSoaVector3 *result, vec_float4 x, vec_float4 y, vec_float4 z );
+
+/*
+ * Copy elements from a 3-D point into a 3-D vector
+ */
+static inline void vmathSoaV3MakeFromP3( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Set all elements of a 3-D vector to the same scalar value
+ */
+static inline void vmathSoaV3MakeFromScalar( VmathSoaVector3 *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3-D vector
+ */
+static inline void vmathSoaV3MakeFromAos( VmathSoaVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Insert four AoS 3-D vectors
+ */
+static inline void vmathSoaV3MakeFrom4Aos( VmathSoaVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3 );
+
+/*
+ * Extract four AoS 3-D vectors
+ */
+static inline void vmathSoaV3Get4Aos( const VmathSoaVector3 *vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 );
+
+/*
+ * Set the x element of a 3-D vector
+ */
+static inline void vmathSoaV3SetX( VmathSoaVector3 *result, vec_float4 x );
+
+/*
+ * Set the y element of a 3-D vector
+ */
+static inline void vmathSoaV3SetY( VmathSoaVector3 *result, vec_float4 y );
+
+/*
+ * Set the z element of a 3-D vector
+ */
+static inline void vmathSoaV3SetZ( VmathSoaVector3 *result, vec_float4 z );
+
+/*
+ * Get the x element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3GetX( const VmathSoaVector3 *vec );
+
+/*
+ * Get the y element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3GetY( const VmathSoaVector3 *vec );
+
+/*
+ * Get the z element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3GetZ( const VmathSoaVector3 *vec );
+
+/*
+ * Set an x, y, or z element of a 3-D vector by index
+ */
+static inline void vmathSoaV3SetElem( VmathSoaVector3 *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, or z element of a 3-D vector by index
+ */
+static inline vec_float4 vmathSoaV3GetElem( const VmathSoaVector3 *vec, int idx );
+
+/*
+ * Add two 3-D vectors
+ */
+static inline void vmathSoaV3Add( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Subtract a 3-D vector from another 3-D vector
+ */
+static inline void vmathSoaV3Sub( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Add a 3-D vector to a 3-D point
+ */
+static inline void vmathSoaV3AddP3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec, const VmathSoaPoint3 *pnt );
+
+/*
+ * Multiply a 3-D vector by a scalar
+ */
+static inline void vmathSoaV3ScalarMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar );
+
+/*
+ * Divide a 3-D vector by a scalar
+ */
+static inline void vmathSoaV3ScalarDiv( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar );
+
+/*
+ * Negate all elements of a 3-D vector
+ */
+static inline void vmathSoaV3Neg( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Construct x axis
+ */
+static inline void vmathSoaV3MakeXAxis( VmathSoaVector3 *result );
+
+/*
+ * Construct y axis
+ */
+static inline void vmathSoaV3MakeYAxis( VmathSoaVector3 *result );
+
+/*
+ * Construct z axis
+ */
+static inline void vmathSoaV3MakeZAxis( VmathSoaVector3 *result );
+
+/*
+ * Multiply two 3-D vectors per element
+ */
+static inline void vmathSoaV3MulPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Divide two 3-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathSoaV3DivPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Compute the reciprocal of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathSoaV3RecipPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Compute the square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathSoaV3SqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Compute the reciprocal square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathSoaV3RsqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Compute the absolute value of a 3-D vector per element
+ */
+static inline void vmathSoaV3AbsPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Copy sign from one 3-D vector to another, per element
+ */
+static inline void vmathSoaV3CopySignPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Maximum of two 3-D vectors per element
+ */
+static inline void vmathSoaV3MaxPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Minimum of two 3-D vectors per element
+ */
+static inline void vmathSoaV3MinPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Maximum element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3MaxElem( const VmathSoaVector3 *vec );
+
+/*
+ * Minimum element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3MinElem( const VmathSoaVector3 *vec );
+
+/*
+ * Compute the sum of all elements of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3Sum( const VmathSoaVector3 *vec );
+
+/*
+ * Compute the dot product of two 3-D vectors
+ */
+static inline vec_float4 vmathSoaV3Dot( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Compute the square of the length of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3LengthSqr( const VmathSoaVector3 *vec );
+
+/*
+ * Compute the length of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3Length( const VmathSoaVector3 *vec );
+
+/*
+ * Normalize a 3-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline void vmathSoaV3Normalize( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Compute cross product of two 3-D vectors
+ */
+static inline void vmathSoaV3Cross( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Outer product of two 3-D vectors
+ */
+static inline void vmathSoaV3Outer( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Pre-multiply a row vector by a 3x3 matrix
+ */
+static inline void vmathSoaV3RowMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat );
+
+/*
+ * Cross-product matrix of a 3-D vector
+ */
+static inline void vmathSoaV3CrossMatrix( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Create cross-product matrix and multiply
+ * NOTE: 
+ * Faster than separately creating a cross-product matrix and multiplying.
+ */
+static inline void vmathSoaV3CrossMatrixMul( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat );
+
+/*
+ * Linear interpolation between two 3-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaV3Lerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Spherical linear interpolation between two 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaV3Slerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 );
+
+/*
+ * Conditionally select between two 3-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaV3Select( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_uint4 select1 );
+
+/*
+ * Load four three-float 3-D vectors, stored in three quadwords
+ */
+static inline void vmathSoaV3LoadXYZArray( VmathSoaVector3 *vec, const vec_float4 *threeQuads );
+
+/*
+ * Store four slots of an SoA 3-D vector in three quadwords
+ */
+static inline void vmathSoaV3StoreXYZArray( const VmathSoaVector3 *vec, vec_float4 *threeQuads );
+
+/*
+ * Store eight slots of two SoA 3-D vectors as half-floats
+ */
+static inline void vmathSoaV3StoreHalfFloats( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV3Print( const VmathSoaVector3 *vec );
+
+/*
+ * Print a 3-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV3Prints( const VmathSoaVector3 *vec, const char *name );
+
+#endif
+
+/*
+ * Copy a 4-D vector
+ */
+static inline void vmathSoaV4Copy( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Construct a 4-D vector from x, y, z, and w elements
+ */
+static inline void vmathSoaV4MakeFromElems( VmathSoaVector4 *result, vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
+
+/*
+ * Construct a 4-D vector from a 3-D vector and a scalar
+ */
+static inline void vmathSoaV4MakeFromV3Scalar( VmathSoaVector4 *result, const VmathSoaVector3 *xyz, vec_float4 w );
+
+/*
+ * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+ */
+static inline void vmathSoaV4MakeFromV3( VmathSoaVector4 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+ */
+static inline void vmathSoaV4MakeFromP3( VmathSoaVector4 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Copy elements from a quaternion into a 4-D vector
+ */
+static inline void vmathSoaV4MakeFromQ( VmathSoaVector4 *result, const VmathSoaQuat *quat );
+
+/*
+ * Set all elements of a 4-D vector to the same scalar value
+ */
+static inline void vmathSoaV4MakeFromScalar( VmathSoaVector4 *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS 4-D vector
+ */
+static inline void vmathSoaV4MakeFromAos( VmathSoaVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Insert four AoS 4-D vectors
+ */
+static inline void vmathSoaV4MakeFrom4Aos( VmathSoaVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3 );
+
+/*
+ * Extract four AoS 4-D vectors
+ */
+static inline void vmathSoaV4Get4Aos( const VmathSoaVector4 *vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 );
+
+/*
+ * Set the x, y, and z elements of a 4-D vector
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathSoaV4SetXYZ( VmathSoaVector4 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Get the x, y, and z elements of a 4-D vector
+ */
+static inline void vmathSoaV4GetXYZ( VmathSoaVector3 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Set the x element of a 4-D vector
+ */
+static inline void vmathSoaV4SetX( VmathSoaVector4 *result, vec_float4 x );
+
+/*
+ * Set the y element of a 4-D vector
+ */
+static inline void vmathSoaV4SetY( VmathSoaVector4 *result, vec_float4 y );
+
+/*
+ * Set the z element of a 4-D vector
+ */
+static inline void vmathSoaV4SetZ( VmathSoaVector4 *result, vec_float4 z );
+
+/*
+ * Set the w element of a 4-D vector
+ */
+static inline void vmathSoaV4SetW( VmathSoaVector4 *result, vec_float4 w );
+
+/*
+ * Get the x element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetX( const VmathSoaVector4 *vec );
+
+/*
+ * Get the y element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetY( const VmathSoaVector4 *vec );
+
+/*
+ * Get the z element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetZ( const VmathSoaVector4 *vec );
+
+/*
+ * Get the w element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetW( const VmathSoaVector4 *vec );
+
+/*
+ * Set an x, y, z, or w element of a 4-D vector by index
+ */
+static inline void vmathSoaV4SetElem( VmathSoaVector4 *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, z, or w element of a 4-D vector by index
+ */
+static inline vec_float4 vmathSoaV4GetElem( const VmathSoaVector4 *vec, int idx );
+
+/*
+ * Add two 4-D vectors
+ */
+static inline void vmathSoaV4Add( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Subtract a 4-D vector from another 4-D vector
+ */
+static inline void vmathSoaV4Sub( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Multiply a 4-D vector by a scalar
+ */
+static inline void vmathSoaV4ScalarMul( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar );
+
+/*
+ * Divide a 4-D vector by a scalar
+ */
+static inline void vmathSoaV4ScalarDiv( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar );
+
+/*
+ * Negate all elements of a 4-D vector
+ */
+static inline void vmathSoaV4Neg( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Construct x axis
+ */
+static inline void vmathSoaV4MakeXAxis( VmathSoaVector4 *result );
+
+/*
+ * Construct y axis
+ */
+static inline void vmathSoaV4MakeYAxis( VmathSoaVector4 *result );
+
+/*
+ * Construct z axis
+ */
+static inline void vmathSoaV4MakeZAxis( VmathSoaVector4 *result );
+
+/*
+ * Construct w axis
+ */
+static inline void vmathSoaV4MakeWAxis( VmathSoaVector4 *result );
+
+/*
+ * Multiply two 4-D vectors per element
+ */
+static inline void vmathSoaV4MulPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Divide two 4-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathSoaV4DivPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Compute the reciprocal of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathSoaV4RecipPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Compute the square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathSoaV4SqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Compute the reciprocal square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathSoaV4RsqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Compute the absolute value of a 4-D vector per element
+ */
+static inline void vmathSoaV4AbsPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Copy sign from one 4-D vector to another, per element
+ */
+static inline void vmathSoaV4CopySignPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Maximum of two 4-D vectors per element
+ */
+static inline void vmathSoaV4MaxPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Minimum of two 4-D vectors per element
+ */
+static inline void vmathSoaV4MinPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Maximum element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4MaxElem( const VmathSoaVector4 *vec );
+
+/*
+ * Minimum element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4MinElem( const VmathSoaVector4 *vec );
+
+/*
+ * Compute the sum of all elements of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4Sum( const VmathSoaVector4 *vec );
+
+/*
+ * Compute the dot product of two 4-D vectors
+ */
+static inline vec_float4 vmathSoaV4Dot( const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Compute the square of the length of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4LengthSqr( const VmathSoaVector4 *vec );
+
+/*
+ * Compute the length of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4Length( const VmathSoaVector4 *vec );
+
+/*
+ * Normalize a 4-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline void vmathSoaV4Normalize( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Outer product of two 4-D vectors
+ */
+static inline void vmathSoaV4Outer( VmathSoaMatrix4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Linear interpolation between two 4-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaV4Lerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Spherical linear interpolation between two 4-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaV4Slerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *unitVec0, const VmathSoaVector4 *unitVec1 );
+
+/*
+ * Conditionally select between two 4-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaV4Select( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1, vec_uint4 select1 );
+
+/*
+ * Store four slots of an SoA 4-D vector as half-floats
+ */
+static inline void vmathSoaV4StoreHalfFloats( const VmathSoaVector4 *vec, vec_ushort8 *twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV4Print( const VmathSoaVector4 *vec );
+
+/*
+ * Print a 4-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV4Prints( const VmathSoaVector4 *vec, const char *name );
+
+#endif
+
+/*
+ * Copy a 3-D point
+ */
+static inline void vmathSoaP3Copy( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Construct a 3-D point from x, y, and z elements
+ */
+static inline void vmathSoaP3MakeFromElems( VmathSoaPoint3 *result, vec_float4 x, vec_float4 y, vec_float4 z );
+
+/*
+ * Copy elements from a 3-D vector into a 3-D point
+ */
+static inline void vmathSoaP3MakeFromV3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Set all elements of a 3-D point to the same scalar value
+ */
+static inline void vmathSoaP3MakeFromScalar( VmathSoaPoint3 *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3-D point
+ */
+static inline void vmathSoaP3MakeFromAos( VmathSoaPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Insert four AoS 3-D points
+ */
+static inline void vmathSoaP3MakeFrom4Aos( VmathSoaPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3 );
+
+/*
+ * Extract four AoS 3-D points
+ */
+static inline void vmathSoaP3Get4Aos( const VmathSoaPoint3 *pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 );
+
+/*
+ * Set the x element of a 3-D point
+ */
+static inline void vmathSoaP3SetX( VmathSoaPoint3 *result, vec_float4 x );
+
+/*
+ * Set the y element of a 3-D point
+ */
+static inline void vmathSoaP3SetY( VmathSoaPoint3 *result, vec_float4 y );
+
+/*
+ * Set the z element of a 3-D point
+ */
+static inline void vmathSoaP3SetZ( VmathSoaPoint3 *result, vec_float4 z );
+
+/*
+ * Get the x element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3GetX( const VmathSoaPoint3 *pnt );
+
+/*
+ * Get the y element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3GetY( const VmathSoaPoint3 *pnt );
+
+/*
+ * Get the z element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3GetZ( const VmathSoaPoint3 *pnt );
+
+/*
+ * Set an x, y, or z element of a 3-D point by index
+ */
+static inline void vmathSoaP3SetElem( VmathSoaPoint3 *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, or z element of a 3-D point by index
+ */
+static inline vec_float4 vmathSoaP3GetElem( const VmathSoaPoint3 *pnt, int idx );
+
+/*
+ * Subtract a 3-D point from another 3-D point
+ */
+static inline void vmathSoaP3Sub( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Add a 3-D point to a 3-D vector
+ */
+static inline void vmathSoaP3AddV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec );
+
+/*
+ * Subtract a 3-D vector from a 3-D point
+ */
+static inline void vmathSoaP3SubV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec );
+
+/*
+ * Multiply two 3-D points per element
+ */
+static inline void vmathSoaP3MulPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Divide two 3-D points per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathSoaP3DivPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Compute the reciprocal of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathSoaP3RecipPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Compute the square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathSoaP3SqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Compute the reciprocal square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathSoaP3RsqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Compute the absolute value of a 3-D point per element
+ */
+static inline void vmathSoaP3AbsPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Copy sign from one 3-D point to another, per element
+ */
+static inline void vmathSoaP3CopySignPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Maximum of two 3-D points per element
+ */
+static inline void vmathSoaP3MaxPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Minimum of two 3-D points per element
+ */
+static inline void vmathSoaP3MinPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Maximum element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3MaxElem( const VmathSoaPoint3 *pnt );
+
+/*
+ * Minimum element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3MinElem( const VmathSoaPoint3 *pnt );
+
+/*
+ * Compute the sum of all elements of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3Sum( const VmathSoaPoint3 *pnt );
+
+/*
+ * Apply uniform scale to a 3-D point
+ */
+static inline void vmathSoaP3Scale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, vec_float4 scaleVal );
+
+/*
+ * Apply non-uniform scale to a 3-D point
+ */
+static inline void vmathSoaP3NonUniformScale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Scalar projection of a 3-D point on a unit-length 3-D vector
+ */
+static inline vec_float4 vmathSoaP3Projection( const VmathSoaPoint3 *pnt, const VmathSoaVector3 *unitVec );
+
+/*
+ * Compute the square of the distance of a 3-D point from the coordinate-system origin
+ */
+static inline vec_float4 vmathSoaP3DistSqrFromOrigin( const VmathSoaPoint3 *pnt );
+
+/*
+ * Compute the distance of a 3-D point from the coordinate-system origin
+ */
+static inline vec_float4 vmathSoaP3DistFromOrigin( const VmathSoaPoint3 *pnt );
+
+/*
+ * Compute the square of the distance between two 3-D points
+ */
+static inline vec_float4 vmathSoaP3DistSqr( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Compute the distance between two 3-D points
+ */
+static inline vec_float4 vmathSoaP3Dist( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Linear interpolation between two 3-D points
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaP3Lerp( VmathSoaPoint3 *result, vec_float4 t, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Conditionally select between two 3-D points
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaP3Select( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_uint4 select1 );
+
+/*
+ * Load four three-float 3-D points, stored in three quadwords
+ */
+static inline void vmathSoaP3LoadXYZArray( VmathSoaPoint3 *pnt, const vec_float4 *threeQuads );
+
+/*
+ * Store four slots of an SoA 3-D point in three quadwords
+ */
+static inline void vmathSoaP3StoreXYZArray( const VmathSoaPoint3 *pnt, vec_float4 *threeQuads );
+
+/*
+ * Store eight slots of two SoA 3-D points as half-floats
+ */
+static inline void vmathSoaP3StoreHalfFloats( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D point
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaP3Print( const VmathSoaPoint3 *pnt );
+
+/*
+ * Print a 3-D point and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaP3Prints( const VmathSoaPoint3 *pnt, const char *name );
+
+#endif
+
+/*
+ * Copy a quaternion
+ */
+static inline void vmathSoaQCopy( VmathSoaQuat *result, const VmathSoaQuat *quat );
+
+/*
+ * Construct a quaternion from x, y, z, and w elements
+ */
+static inline void vmathSoaQMakeFromElems( VmathSoaQuat *result, vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
+
+/*
+ * Construct a quaternion from a 3-D vector and a scalar
+ */
+static inline void vmathSoaQMakeFromV3Scalar( VmathSoaQuat *result, const VmathSoaVector3 *xyz, vec_float4 w );
+
+/*
+ * Copy elements from a 4-D vector into a quaternion
+ */
+static inline void vmathSoaQMakeFromV4( VmathSoaQuat *result, const VmathSoaVector4 *vec );
+
+/*
+ * Convert a rotation matrix to a unit-length quaternion
+ */
+static inline void vmathSoaQMakeFromM3( VmathSoaQuat *result, const VmathSoaMatrix3 *rotMat );
+
+/*
+ * Set all elements of a quaternion to the same scalar value
+ */
+static inline void vmathSoaQMakeFromScalar( VmathSoaQuat *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS quaternion
+ */
+static inline void vmathSoaQMakeFromAos( VmathSoaQuat *result, const VmathQuat *quat );
+
+/*
+ * Insert four AoS quaternions
+ */
+static inline void vmathSoaQMakeFrom4Aos( VmathSoaQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, const VmathQuat *quat2, const VmathQuat *quat3 );
+
+/*
+ * Extract four AoS quaternions
+ */
+static inline void vmathSoaQGet4Aos( const VmathSoaQuat *quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 );
+
+/*
+ * Set the x, y, and z elements of a quaternion
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathSoaQSetXYZ( VmathSoaQuat *result, const VmathSoaVector3 *vec );
+
+/*
+ * Get the x, y, and z elements of a quaternion
+ */
+static inline void vmathSoaQGetXYZ( VmathSoaVector3 *result, const VmathSoaQuat *quat );
+
+/*
+ * Set the x element of a quaternion
+ */
+static inline void vmathSoaQSetX( VmathSoaQuat *result, vec_float4 x );
+
+/*
+ * Set the y element of a quaternion
+ */
+static inline void vmathSoaQSetY( VmathSoaQuat *result, vec_float4 y );
+
+/*
+ * Set the z element of a quaternion
+ */
+static inline void vmathSoaQSetZ( VmathSoaQuat *result, vec_float4 z );
+
+/*
+ * Set the w element of a quaternion
+ */
+static inline void vmathSoaQSetW( VmathSoaQuat *result, vec_float4 w );
+
+/*
+ * Get the x element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetX( const VmathSoaQuat *quat );
+
+/*
+ * Get the y element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetY( const VmathSoaQuat *quat );
+
+/*
+ * Get the z element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetZ( const VmathSoaQuat *quat );
+
+/*
+ * Get the w element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetW( const VmathSoaQuat *quat );
+
+/*
+ * Set an x, y, z, or w element of a quaternion by index
+ */
+static inline void vmathSoaQSetElem( VmathSoaQuat *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, z, or w element of a quaternion by index
+ */
+static inline vec_float4 vmathSoaQGetElem( const VmathSoaQuat *quat, int idx );
+
+/*
+ * Add two quaternions
+ */
+static inline void vmathSoaQAdd( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
+
+/*
+ * Subtract a quaternion from another quaternion
+ */
+static inline void vmathSoaQSub( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
+
+/*
+ * Multiply two quaternions
+ */
+static inline void vmathSoaQMul( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
+
+/*
+ * Multiply a quaternion by a scalar
+ */
+static inline void vmathSoaQScalarMul( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar );
+
+/*
+ * Divide a quaternion by a scalar
+ */
+static inline void vmathSoaQScalarDiv( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar );
+
+/*
+ * Negate all elements of a quaternion
+ */
+static inline void vmathSoaQNeg( VmathSoaQuat *result, const VmathSoaQuat *quat );
+
+/*
+ * Construct an identity quaternion
+ */
+static inline void vmathSoaQMakeIdentity( VmathSoaQuat *result );
+
+/*
+ * Construct a quaternion to rotate between two unit-length 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+ */
+static inline void vmathSoaQMakeRotationArc( VmathSoaQuat *result, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 );
+
+/*
+ * Construct a quaternion to rotate around a unit-length 3-D vector
+ */
+static inline void vmathSoaQMakeRotationAxis( VmathSoaQuat *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
+
+/*
+ * Construct a quaternion to rotate around the x axis
+ */
+static inline void vmathSoaQMakeRotationX( VmathSoaQuat *result, vec_float4 radians );
+
+/*
+ * Construct a quaternion to rotate around the y axis
+ */
+static inline void vmathSoaQMakeRotationY( VmathSoaQuat *result, vec_float4 radians );
+
+/*
+ * Construct a quaternion to rotate around the z axis
+ */
+static inline void vmathSoaQMakeRotationZ( VmathSoaQuat *result, vec_float4 radians );
+
+/*
+ * Compute the conjugate of a quaternion
+ */
+static inline void vmathSoaQConj( VmathSoaQuat *result, const VmathSoaQuat *quat );
+
+/*
+ * Use a unit-length quaternion to rotate a 3-D vector
+ */
+static inline void vmathSoaQRotate( VmathSoaVector3 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *vec );
+
+/*
+ * Compute the dot product of two quaternions
+ */
+static inline vec_float4 vmathSoaQDot( const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
+
+/*
+ * Compute the norm of a quaternion
+ */
+static inline vec_float4 vmathSoaQNorm( const VmathSoaQuat *quat );
+
+/*
+ * Compute the length of a quaternion
+ */
+static inline vec_float4 vmathSoaQLength( const VmathSoaQuat *quat );
+
+/*
+ * Normalize a quaternion
+ * NOTE: 
+ * The result is unpredictable when all elements of quat are at or near zero.
+ */
+static inline void vmathSoaQNormalize( VmathSoaQuat *result, const VmathSoaQuat *quat );
+
+/*
+ * Linear interpolation between two quaternions
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaQLerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
+
+/*
+ * Spherical linear interpolation between two quaternions
+ * NOTE: 
+ * Interpolates along the shortest path between orientations.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaQSlerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1 );
+
+/*
+ * Spherical quadrangle interpolation
+ */
+static inline void vmathSoaQSquad( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1, const VmathSoaQuat *unitQuat2, const VmathSoaQuat *unitQuat3 );
+
+/*
+ * Conditionally select between two quaternions
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaQSelect( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a quaternion
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaQPrint( const VmathSoaQuat *quat );
+
+/*
+ * Print a quaternion and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaQPrints( const VmathSoaQuat *quat, const char *name );
+
+#endif
+
+/*
+ * Copy a 3x3 matrix
+ */
+static inline void vmathSoaM3Copy( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Construct a 3x3 matrix containing the specified columns
+ */
+static inline void vmathSoaM3MakeFromCols( VmathSoaMatrix3 *result, const VmathSoaVector3 *col0, const VmathSoaVector3 *col1, const VmathSoaVector3 *col2 );
+
+/*
+ * Construct a 3x3 rotation matrix from a unit-length quaternion
+ */
+static inline void vmathSoaM3MakeFromQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat );
+
+/*
+ * Set all elements of a 3x3 matrix to the same scalar value
+ */
+static inline void vmathSoaM3MakeFromScalar( VmathSoaMatrix3 *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3x3 matrix
+ */
+static inline void vmathSoaM3MakeFromAos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Insert four AoS 3x3 matrices
+ */
+static inline void vmathSoaM3MakeFrom4Aos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, const VmathMatrix3 *mat2, const VmathMatrix3 *mat3 );
+
+/*
+ * Extract four AoS 3x3 matrices
+ */
+static inline void vmathSoaM3Get4Aos( const VmathSoaMatrix3 *mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 );
+
+/*
+ * Set column 0 of a 3x3 matrix
+ */
+static inline void vmathSoaM3SetCol0( VmathSoaMatrix3 *result, const VmathSoaVector3 *col0 );
+
+/*
+ * Set column 1 of a 3x3 matrix
+ */
+static inline void vmathSoaM3SetCol1( VmathSoaMatrix3 *result, const VmathSoaVector3 *col1 );
+
+/*
+ * Set column 2 of a 3x3 matrix
+ */
+static inline void vmathSoaM3SetCol2( VmathSoaMatrix3 *result, const VmathSoaVector3 *col2 );
+
+/*
+ * Get column 0 of a 3x3 matrix
+ */
+static inline void vmathSoaM3GetCol0( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Get column 1 of a 3x3 matrix
+ */
+static inline void vmathSoaM3GetCol1( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Get column 2 of a 3x3 matrix
+ */
+static inline void vmathSoaM3GetCol2( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Set the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathSoaM3SetCol( VmathSoaMatrix3 *result, int col, const VmathSoaVector3 *vec );
+
+/*
+ * Set the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathSoaM3SetRow( VmathSoaMatrix3 *result, int row, const VmathSoaVector3 *vec );
+
+/*
+ * Get the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathSoaM3GetCol( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int col );
+
+/*
+ * Get the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathSoaM3GetRow( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int row );
+
+/*
+ * Set the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline void vmathSoaM3SetElem( VmathSoaMatrix3 *result, int col, int row, vec_float4 val );
+
+/*
+ * Get the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline vec_float4 vmathSoaM3GetElem( const VmathSoaMatrix3 *mat, int col, int row );
+
+/*
+ * Add two 3x3 matrices
+ */
+static inline void vmathSoaM3Add( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
+
+/*
+ * Subtract a 3x3 matrix from another 3x3 matrix
+ */
+static inline void vmathSoaM3Sub( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
+
+/*
+ * Negate all elements of a 3x3 matrix
+ */
+static inline void vmathSoaM3Neg( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Multiply a 3x3 matrix by a scalar
+ */
+static inline void vmathSoaM3ScalarMul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, vec_float4 scalar );
+
+/*
+ * Multiply a 3x3 matrix by a 3-D vector
+ */
+static inline void vmathSoaM3MulV3( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *vec );
+
+/*
+ * Multiply two 3x3 matrices
+ */
+static inline void vmathSoaM3Mul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
+
+/*
+ * Construct an identity 3x3 matrix
+ */
+static inline void vmathSoaM3MakeIdentity( VmathSoaMatrix3 *result );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x axis
+ */
+static inline void vmathSoaM3MakeRotationX( VmathSoaMatrix3 *result, vec_float4 radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the y axis
+ */
+static inline void vmathSoaM3MakeRotationY( VmathSoaMatrix3 *result, vec_float4 radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the z axis
+ */
+static inline void vmathSoaM3MakeRotationZ( VmathSoaMatrix3 *result, vec_float4 radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathSoaM3MakeRotationZYX( VmathSoaMatrix3 *result, const VmathSoaVector3 *radiansXYZ );
+
+/*
+ * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathSoaM3MakeRotationAxis( VmathSoaMatrix3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathSoaM3MakeRotationQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat );
+
+/*
+ * Construct a 3x3 matrix to perform scaling
+ */
+static inline void vmathSoaM3MakeScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathSoaM3AppendScale( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathSoaM3PrependScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix3 *mat );
+
+/*
+ * Multiply two 3x3 matrices per element
+ */
+static inline void vmathSoaM3MulPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
+
+/*
+ * Compute the absolute value of a 3x3 matrix per element
+ */
+static inline void vmathSoaM3AbsPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Transpose of a 3x3 matrix
+ */
+static inline void vmathSoaM3Transpose( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Compute the inverse of a 3x3 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathSoaM3Inverse( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Determinant of a 3x3 matrix
+ */
+static inline vec_float4 vmathSoaM3Determinant( const VmathSoaMatrix3 *mat );
+
+/*
+ * Conditionally select between two 3x3 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaM3Select( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x3 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM3Print( const VmathSoaMatrix3 *mat );
+
+/*
+ * Print a 3x3 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM3Prints( const VmathSoaMatrix3 *mat, const char *name );
+
+#endif
+
+/*
+ * Copy a 4x4 matrix
+ */
+static inline void vmathSoaM4Copy( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Construct a 4x4 matrix containing the specified columns
+ */
+static inline void vmathSoaM4MakeFromCols( VmathSoaMatrix4 *result, const VmathSoaVector4 *col0, const VmathSoaVector4 *col1, const VmathSoaVector4 *col2, const VmathSoaVector4 *col3 );
+
+/*
+ * Construct a 4x4 matrix from a 3x4 transformation matrix
+ */
+static inline void vmathSoaM4MakeFromT3( VmathSoaMatrix4 *result, const VmathSoaTransform3 *mat );
+
+/*
+ * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline void vmathSoaM4MakeFromM3V3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *translateVec );
+
+/*
+ * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline void vmathSoaM4MakeFromQV3( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec );
+
+/*
+ * Set all elements of a 4x4 matrix to the same scalar value
+ */
+static inline void vmathSoaM4MakeFromScalar( VmathSoaMatrix4 *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS 4x4 matrix
+ */
+static inline void vmathSoaM4MakeFromAos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Insert four AoS 4x4 matrices
+ */
+static inline void vmathSoaM4MakeFrom4Aos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, const VmathMatrix4 *mat2, const VmathMatrix4 *mat3 );
+
+/*
+ * Extract four AoS 4x4 matrices
+ */
+static inline void vmathSoaM4Get4Aos( const VmathSoaMatrix4 *mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathSoaM4SetUpper3x3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 4x4 matrix
+ */
+static inline void vmathSoaM4GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Set translation component
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathSoaM4SetTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec );
+
+/*
+ * Get the translation component of a 4x4 matrix
+ */
+static inline void vmathSoaM4GetTranslation( VmathSoaVector3 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Set column 0 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol0( VmathSoaMatrix4 *result, const VmathSoaVector4 *col0 );
+
+/*
+ * Set column 1 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol1( VmathSoaMatrix4 *result, const VmathSoaVector4 *col1 );
+
+/*
+ * Set column 2 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol2( VmathSoaMatrix4 *result, const VmathSoaVector4 *col2 );
+
+/*
+ * Set column 3 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol3( VmathSoaMatrix4 *result, const VmathSoaVector4 *col3 );
+
+/*
+ * Get column 0 of a 4x4 matrix
+ */
+static inline void vmathSoaM4GetCol0( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Get column 1 of a 4x4 matrix
+ */
+static inline void vmathSoaM4GetCol1( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Get column 2 of a 4x4 matrix
+ */
+static inline void vmathSoaM4GetCol2( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Get column 3 of a 4x4 matrix
+ */
+static inline void vmathSoaM4GetCol3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Set the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathSoaM4SetCol( VmathSoaMatrix4 *result, int col, const VmathSoaVector4 *vec );
+
+/*
+ * Set the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathSoaM4SetRow( VmathSoaMatrix4 *result, int row, const VmathSoaVector4 *vec );
+
+/*
+ * Get the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathSoaM4GetCol( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int col );
+
+/*
+ * Get the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathSoaM4GetRow( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int row );
+
+/*
+ * Set the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline void vmathSoaM4SetElem( VmathSoaMatrix4 *result, int col, int row, vec_float4 val );
+
+/*
+ * Get the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline vec_float4 vmathSoaM4GetElem( const VmathSoaMatrix4 *mat, int col, int row );
+
+/*
+ * Add two 4x4 matrices
+ */
+static inline void vmathSoaM4Add( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
+
+/*
+ * Subtract a 4x4 matrix from another 4x4 matrix
+ */
+static inline void vmathSoaM4Sub( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
+
+/*
+ * Negate all elements of a 4x4 matrix
+ */
+static inline void vmathSoaM4Neg( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Multiply a 4x4 matrix by a scalar
+ */
+static inline void vmathSoaM4ScalarMul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, vec_float4 scalar );
+
+/*
+ * Multiply a 4x4 matrix by a 4-D vector
+ */
+static inline void vmathSoaM4MulV4( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector4 *vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D vector
+ */
+static inline void vmathSoaM4MulV3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D point
+ */
+static inline void vmathSoaM4MulP3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaPoint3 *pnt );
+
+/*
+ * Multiply two 4x4 matrices
+ */
+static inline void vmathSoaM4Mul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
+
+/*
+ * Multiply a 4x4 matrix by a 3x4 transformation matrix
+ */
+static inline void vmathSoaM4MulT3( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Construct an identity 4x4 matrix
+ */
+static inline void vmathSoaM4MakeIdentity( VmathSoaMatrix4 *result );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x axis
+ */
+static inline void vmathSoaM4MakeRotationX( VmathSoaMatrix4 *result, vec_float4 radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the y axis
+ */
+static inline void vmathSoaM4MakeRotationY( VmathSoaMatrix4 *result, vec_float4 radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the z axis
+ */
+static inline void vmathSoaM4MakeRotationZ( VmathSoaMatrix4 *result, vec_float4 radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathSoaM4MakeRotationZYX( VmathSoaMatrix4 *result, const VmathSoaVector3 *radiansXYZ );
+
+/*
+ * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathSoaM4MakeRotationAxis( VmathSoaMatrix4 *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathSoaM4MakeRotationQ( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat );
+
+/*
+ * Construct a 4x4 matrix to perform scaling
+ */
+static inline void vmathSoaM4MakeScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Construct a 4x4 matrix to perform translation
+ */
+static inline void vmathSoaM4MakeTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec );
+
+/*
+ * Construct viewing matrix based on eye position, position looked at, and up direction
+ */
+static inline void vmathSoaM4MakeLookAt( VmathSoaMatrix4 *result, const VmathSoaPoint3 *eyePos, const VmathSoaPoint3 *lookAtPos, const VmathSoaVector3 *upVec );
+
+/*
+ * Construct a perspective projection matrix
+ */
+static inline void vmathSoaM4MakePerspective( VmathSoaMatrix4 *result, vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar );
+
+/*
+ * Construct a perspective projection matrix based on frustum
+ */
+static inline void vmathSoaM4MakeFrustum( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
+
+/*
+ * Construct an orthographic projection matrix
+ */
+static inline void vmathSoaM4MakeOrthographic( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
+
+/*
+ * Append (post-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathSoaM4AppendScale( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathSoaM4PrependScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix4 *mat );
+
+/*
+ * Multiply two 4x4 matrices per element
+ */
+static inline void vmathSoaM4MulPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
+
+/*
+ * Compute the absolute value of a 4x4 matrix per element
+ */
+static inline void vmathSoaM4AbsPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Transpose of a 4x4 matrix
+ */
+static inline void vmathSoaM4Transpose( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathSoaM4Inverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathSoaM4AffineInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+ */
+static inline void vmathSoaM4OrthoInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Determinant of a 4x4 matrix
+ */
+static inline vec_float4 vmathSoaM4Determinant( const VmathSoaMatrix4 *mat );
+
+/*
+ * Conditionally select between two 4x4 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaM4Select( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4x4 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM4Print( const VmathSoaMatrix4 *mat );
+
+/*
+ * Print a 4x4 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM4Prints( const VmathSoaMatrix4 *mat, const char *name );
+
+#endif
+
+/*
+ * Copy a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3Copy( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Construct a 3x4 transformation matrix containing the specified columns
+ */
+static inline void vmathSoaT3MakeFromCols( VmathSoaTransform3 *result, const VmathSoaVector3 *col0, const VmathSoaVector3 *col1, const VmathSoaVector3 *col2, const VmathSoaVector3 *col3 );
+
+/*
+ * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline void vmathSoaT3MakeFromM3V3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *tfrm, const VmathSoaVector3 *translateVec );
+
+/*
+ * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline void vmathSoaT3MakeFromQV3( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec );
+
+/*
+ * Set all elements of a 3x4 transformation matrix to the same scalar value
+ */
+static inline void vmathSoaT3MakeFromScalar( VmathSoaTransform3 *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3x4 transformation matrix
+ */
+static inline void vmathSoaT3MakeFromAos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Insert four AoS 3x4 transformation matrices
+ */
+static inline void vmathSoaT3MakeFrom4Aos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, const VmathTransform3 *tfrm2, const VmathTransform3 *tfrm3 );
+
+/*
+ * Extract four AoS 3x4 transformation matrices
+ */
+static inline void vmathSoaT3Get4Aos( const VmathSoaTransform3 *tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ */
+static inline void vmathSoaT3SetUpper3x3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Set translation component
+ */
+static inline void vmathSoaT3SetTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec );
+
+/*
+ * Get the translation component of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3GetTranslation( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Set column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol0( VmathSoaTransform3 *result, const VmathSoaVector3 *col0 );
+
+/*
+ * Set column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol1( VmathSoaTransform3 *result, const VmathSoaVector3 *col1 );
+
+/*
+ * Set column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol2( VmathSoaTransform3 *result, const VmathSoaVector3 *col2 );
+
+/*
+ * Set column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol3( VmathSoaTransform3 *result, const VmathSoaVector3 *col3 );
+
+/*
+ * Get column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3GetCol0( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Get column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3GetCol1( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Get column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3GetCol2( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Get column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3GetCol3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Set the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathSoaT3SetCol( VmathSoaTransform3 *result, int col, const VmathSoaVector3 *vec );
+
+/*
+ * Set the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathSoaT3SetRow( VmathSoaTransform3 *result, int row, const VmathSoaVector4 *vec );
+
+/*
+ * Get the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathSoaT3GetCol( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, int col );
+
+/*
+ * Get the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathSoaT3GetRow( VmathSoaVector4 *result, const VmathSoaTransform3 *tfrm, int row );
+
+/*
+ * Set the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline void vmathSoaT3SetElem( VmathSoaTransform3 *result, int col, int row, vec_float4 val );
+
+/*
+ * Get the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline vec_float4 vmathSoaT3GetElem( const VmathSoaTransform3 *tfrm, int col, int row );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D vector
+ */
+static inline void vmathSoaT3MulV3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *vec );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D point
+ */
+static inline void vmathSoaT3MulP3( VmathSoaPoint3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaPoint3 *pnt );
+
+/*
+ * Multiply two 3x4 transformation matrices
+ */
+static inline void vmathSoaT3Mul( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 );
+
+/*
+ * Construct an identity 3x4 transformation matrix
+ */
+static inline void vmathSoaT3MakeIdentity( VmathSoaTransform3 *result );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x axis
+ */
+static inline void vmathSoaT3MakeRotationX( VmathSoaTransform3 *result, vec_float4 radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the y axis
+ */
+static inline void vmathSoaT3MakeRotationY( VmathSoaTransform3 *result, vec_float4 radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the z axis
+ */
+static inline void vmathSoaT3MakeRotationZ( VmathSoaTransform3 *result, vec_float4 radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathSoaT3MakeRotationZYX( VmathSoaTransform3 *result, const VmathSoaVector3 *radiansXYZ );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathSoaT3MakeRotationAxis( VmathSoaTransform3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathSoaT3MakeRotationQ( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat );
+
+/*
+ * Construct a 3x4 transformation matrix to perform scaling
+ */
+static inline void vmathSoaT3MakeScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Construct a 3x4 transformation matrix to perform translation
+ */
+static inline void vmathSoaT3MakeTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathSoaT3AppendScale( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathSoaT3PrependScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Multiply two 3x4 transformation matrices per element
+ */
+static inline void vmathSoaT3MulPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 );
+
+/*
+ * Compute the absolute value of a 3x4 transformation matrix per element
+ */
+static inline void vmathSoaT3AbsPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Inverse of a 3x4 transformation matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+ */
+static inline void vmathSoaT3Inverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+ */
+static inline void vmathSoaT3OrthoInverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Conditionally select between two 3x4 transformation matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaT3Select( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x4 transformation matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaT3Print( const VmathSoaTransform3 *tfrm );
+
+/*
+ * Print a 3x4 transformation matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaT3Prints( const VmathSoaTransform3 *tfrm, const char *name );
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#include "vec_soa.h"
+#include "quat_soa.h"
+#include "mat_soa.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_soa_v.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_soa_v.h
index 03fd011e9..7a93c1757 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_soa_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_soa_v.h
@@ -1,1979 +1,1979 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_SOA_C_V_PPU_H
-#define _VECTORMATH_SOA_C_V_PPU_H
-
-#include <math.h>
-#include <altivec.h>
-#include "vectormath_aos_v.h"
-
-#ifdef _VECTORMATH_DEBUG
-#include <stdio.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#ifndef _VECTORMATH_SOA_C_TYPES_H
-#define _VECTORMATH_SOA_C_TYPES_H
-
-/* A set of four 3-D vectors in structure-of-arrays format
- */
-typedef struct _VmathSoaVector3
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-} VmathSoaVector3;
-
-/* A set of four 4-D vectors in structure-of-arrays format
- */
-typedef struct _VmathSoaVector4
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-    vec_float4 w;
-} VmathSoaVector4;
-
-/* A set of four 3-D points in structure-of-arrays format
- */
-typedef struct _VmathSoaPoint3
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-} VmathSoaPoint3;
-
-/* A set of four quaternions in structure-of-arrays format
- */
-typedef struct _VmathSoaQuat
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-    vec_float4 w;
-} VmathSoaQuat;
-
-/* A set of four 3x3 matrices in structure-of-arrays format
- */
-typedef struct _VmathSoaMatrix3
-{
-    VmathSoaVector3 col0;
-    VmathSoaVector3 col1;
-    VmathSoaVector3 col2;
-} VmathSoaMatrix3;
-
-/* A set of four 4x4 matrices in structure-of-arrays format
- */
-typedef struct _VmathSoaMatrix4
-{
-    VmathSoaVector4 col0;
-    VmathSoaVector4 col1;
-    VmathSoaVector4 col2;
-    VmathSoaVector4 col3;
-} VmathSoaMatrix4;
-
-/* A set of four 3x4 transformation matrices in structure-of-arrays format
- */
-typedef struct _VmathSoaTransform3
-{
-    VmathSoaVector3 col0;
-    VmathSoaVector3 col1;
-    VmathSoaVector3 col2;
-    VmathSoaVector3 col3;
-} VmathSoaTransform3;
-
-#endif
-
-/*
- * Construct a 3-D vector from x, y, and z elements
- */
-static inline VmathSoaVector3 vmathSoaV3MakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z );
-
-/*
- * Copy elements from a 3-D point into a 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaV3MakeFromP3_V( VmathSoaPoint3 pnt );
-
-/*
- * Set all elements of a 3-D vector to the same scalar value
- */
-static inline VmathSoaVector3 vmathSoaV3MakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaV3MakeFromAos_V( VmathVector3 vec );
-
-/*
- * Insert four AoS 3-D vectors
- */
-static inline VmathSoaVector3 vmathSoaV3MakeFrom4Aos_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3 );
-
-/*
- * Extract four AoS 3-D vectors
- */
-static inline void vmathSoaV3Get4Aos_V( VmathSoaVector3 vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 );
-
-/*
- * Set the x element of a 3-D vector
- */
-static inline void vmathSoaV3SetX_V( VmathSoaVector3 *result, vec_float4 x );
-
-/*
- * Set the y element of a 3-D vector
- */
-static inline void vmathSoaV3SetY_V( VmathSoaVector3 *result, vec_float4 y );
-
-/*
- * Set the z element of a 3-D vector
- */
-static inline void vmathSoaV3SetZ_V( VmathSoaVector3 *result, vec_float4 z );
-
-/*
- * Get the x element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3GetX_V( VmathSoaVector3 vec );
-
-/*
- * Get the y element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3GetY_V( VmathSoaVector3 vec );
-
-/*
- * Get the z element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3GetZ_V( VmathSoaVector3 vec );
-
-/*
- * Set an x, y, or z element of a 3-D vector by index
- */
-static inline void vmathSoaV3SetElem_V( VmathSoaVector3 *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, or z element of a 3-D vector by index
- */
-static inline vec_float4 vmathSoaV3GetElem_V( VmathSoaVector3 vec, int idx );
-
-/*
- * Add two 3-D vectors
- */
-static inline VmathSoaVector3 vmathSoaV3Add_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Subtract a 3-D vector from another 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaV3Sub_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Add a 3-D vector to a 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaV3AddP3_V( VmathSoaVector3 vec, VmathSoaPoint3 pnt );
-
-/*
- * Multiply a 3-D vector by a scalar
- */
-static inline VmathSoaVector3 vmathSoaV3ScalarMul_V( VmathSoaVector3 vec, vec_float4 scalar );
-
-/*
- * Divide a 3-D vector by a scalar
- */
-static inline VmathSoaVector3 vmathSoaV3ScalarDiv_V( VmathSoaVector3 vec, vec_float4 scalar );
-
-/*
- * Negate all elements of a 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaV3Neg_V( VmathSoaVector3 vec );
-
-/*
- * Construct x axis
- */
-static inline VmathSoaVector3 vmathSoaV3MakeXAxis_V( );
-
-/*
- * Construct y axis
- */
-static inline VmathSoaVector3 vmathSoaV3MakeYAxis_V( );
-
-/*
- * Construct z axis
- */
-static inline VmathSoaVector3 vmathSoaV3MakeZAxis_V( );
-
-/*
- * Multiply two 3-D vectors per element
- */
-static inline VmathSoaVector3 vmathSoaV3MulPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Divide two 3-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathSoaVector3 vmathSoaV3DivPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Compute the reciprocal of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathSoaVector3 vmathSoaV3RecipPerElem_V( VmathSoaVector3 vec );
-
-/*
- * Compute the square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathSoaVector3 vmathSoaV3SqrtPerElem_V( VmathSoaVector3 vec );
-
-/*
- * Compute the reciprocal square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathSoaVector3 vmathSoaV3RsqrtPerElem_V( VmathSoaVector3 vec );
-
-/*
- * Compute the absolute value of a 3-D vector per element
- */
-static inline VmathSoaVector3 vmathSoaV3AbsPerElem_V( VmathSoaVector3 vec );
-
-/*
- * Copy sign from one 3-D vector to another, per element
- */
-static inline VmathSoaVector3 vmathSoaV3CopySignPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Maximum of two 3-D vectors per element
- */
-static inline VmathSoaVector3 vmathSoaV3MaxPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Minimum of two 3-D vectors per element
- */
-static inline VmathSoaVector3 vmathSoaV3MinPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Maximum element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3MaxElem_V( VmathSoaVector3 vec );
-
-/*
- * Minimum element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3MinElem_V( VmathSoaVector3 vec );
-
-/*
- * Compute the sum of all elements of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3Sum_V( VmathSoaVector3 vec );
-
-/*
- * Compute the dot product of two 3-D vectors
- */
-static inline vec_float4 vmathSoaV3Dot_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Compute the square of the length of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3LengthSqr_V( VmathSoaVector3 vec );
-
-/*
- * Compute the length of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3Length_V( VmathSoaVector3 vec );
-
-/*
- * Normalize a 3-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline VmathSoaVector3 vmathSoaV3Normalize_V( VmathSoaVector3 vec );
-
-/*
- * Compute cross product of two 3-D vectors
- */
-static inline VmathSoaVector3 vmathSoaV3Cross_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Outer product of two 3-D vectors
- */
-static inline VmathSoaMatrix3 vmathSoaV3Outer_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Pre-multiply a row vector by a 3x3 matrix
- */
-static inline VmathSoaVector3 vmathSoaV3RowMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat );
-
-/*
- * Cross-product matrix of a 3-D vector
- */
-static inline VmathSoaMatrix3 vmathSoaV3CrossMatrix_V( VmathSoaVector3 vec );
-
-/*
- * Create cross-product matrix and multiply
- * NOTE: 
- * Faster than separately creating a cross-product matrix and multiplying.
- */
-static inline VmathSoaMatrix3 vmathSoaV3CrossMatrixMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat );
-
-/*
- * Linear interpolation between two 3-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaVector3 vmathSoaV3Lerp_V( vec_float4 t, VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Spherical linear interpolation between two 3-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaVector3 vmathSoaV3Slerp_V( vec_float4 t, VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 );
-
-/*
- * Conditionally select between two 3-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaVector3 vmathSoaV3Select_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_uint4 select1 );
-
-/*
- * Load four three-float 3-D vectors, stored in three quadwords
- */
-static inline void vmathSoaV3LoadXYZArray_V( VmathSoaVector3 *vec, const vec_float4 *threeQuads );
-
-/*
- * Store four slots of an SoA 3-D vector in three quadwords
- */
-static inline void vmathSoaV3StoreXYZArray_V( VmathSoaVector3 vec, vec_float4 *threeQuads );
-
-/*
- * Store eight slots of two SoA 3-D vectors as half-floats
- */
-static inline void vmathSoaV3StoreHalfFloats_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV3Print_V( VmathSoaVector3 vec );
-
-/*
- * Print a 3-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV3Prints_V( VmathSoaVector3 vec, const char *name );
-
-#endif
-
-/*
- * Construct a 4-D vector from x, y, z, and w elements
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
-
-/*
- * Construct a 4-D vector from a 3-D vector and a scalar
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 w );
-
-/*
- * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromV3_V( VmathSoaVector3 vec );
-
-/*
- * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromP3_V( VmathSoaPoint3 pnt );
-
-/*
- * Copy elements from a quaternion into a 4-D vector
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromQ_V( VmathSoaQuat quat );
-
-/*
- * Set all elements of a 4-D vector to the same scalar value
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS 4-D vector
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromAos_V( VmathVector4 vec );
-
-/*
- * Insert four AoS 4-D vectors
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFrom4Aos_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3 );
-
-/*
- * Extract four AoS 4-D vectors
- */
-static inline void vmathSoaV4Get4Aos_V( VmathSoaVector4 vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 );
-
-/*
- * Set the x, y, and z elements of a 4-D vector
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathSoaV4SetXYZ_V( VmathSoaVector4 *result, VmathSoaVector3 vec );
-
-/*
- * Get the x, y, and z elements of a 4-D vector
- */
-static inline VmathSoaVector3 vmathSoaV4GetXYZ_V( VmathSoaVector4 vec );
-
-/*
- * Set the x element of a 4-D vector
- */
-static inline void vmathSoaV4SetX_V( VmathSoaVector4 *result, vec_float4 x );
-
-/*
- * Set the y element of a 4-D vector
- */
-static inline void vmathSoaV4SetY_V( VmathSoaVector4 *result, vec_float4 y );
-
-/*
- * Set the z element of a 4-D vector
- */
-static inline void vmathSoaV4SetZ_V( VmathSoaVector4 *result, vec_float4 z );
-
-/*
- * Set the w element of a 4-D vector
- */
-static inline void vmathSoaV4SetW_V( VmathSoaVector4 *result, vec_float4 w );
-
-/*
- * Get the x element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetX_V( VmathSoaVector4 vec );
-
-/*
- * Get the y element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetY_V( VmathSoaVector4 vec );
-
-/*
- * Get the z element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetZ_V( VmathSoaVector4 vec );
-
-/*
- * Get the w element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetW_V( VmathSoaVector4 vec );
-
-/*
- * Set an x, y, z, or w element of a 4-D vector by index
- */
-static inline void vmathSoaV4SetElem_V( VmathSoaVector4 *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, z, or w element of a 4-D vector by index
- */
-static inline vec_float4 vmathSoaV4GetElem_V( VmathSoaVector4 vec, int idx );
-
-/*
- * Add two 4-D vectors
- */
-static inline VmathSoaVector4 vmathSoaV4Add_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Subtract a 4-D vector from another 4-D vector
- */
-static inline VmathSoaVector4 vmathSoaV4Sub_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Multiply a 4-D vector by a scalar
- */
-static inline VmathSoaVector4 vmathSoaV4ScalarMul_V( VmathSoaVector4 vec, vec_float4 scalar );
-
-/*
- * Divide a 4-D vector by a scalar
- */
-static inline VmathSoaVector4 vmathSoaV4ScalarDiv_V( VmathSoaVector4 vec, vec_float4 scalar );
-
-/*
- * Negate all elements of a 4-D vector
- */
-static inline VmathSoaVector4 vmathSoaV4Neg_V( VmathSoaVector4 vec );
-
-/*
- * Construct x axis
- */
-static inline VmathSoaVector4 vmathSoaV4MakeXAxis_V( );
-
-/*
- * Construct y axis
- */
-static inline VmathSoaVector4 vmathSoaV4MakeYAxis_V( );
-
-/*
- * Construct z axis
- */
-static inline VmathSoaVector4 vmathSoaV4MakeZAxis_V( );
-
-/*
- * Construct w axis
- */
-static inline VmathSoaVector4 vmathSoaV4MakeWAxis_V( );
-
-/*
- * Multiply two 4-D vectors per element
- */
-static inline VmathSoaVector4 vmathSoaV4MulPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Divide two 4-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathSoaVector4 vmathSoaV4DivPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Compute the reciprocal of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathSoaVector4 vmathSoaV4RecipPerElem_V( VmathSoaVector4 vec );
-
-/*
- * Compute the square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathSoaVector4 vmathSoaV4SqrtPerElem_V( VmathSoaVector4 vec );
-
-/*
- * Compute the reciprocal square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathSoaVector4 vmathSoaV4RsqrtPerElem_V( VmathSoaVector4 vec );
-
-/*
- * Compute the absolute value of a 4-D vector per element
- */
-static inline VmathSoaVector4 vmathSoaV4AbsPerElem_V( VmathSoaVector4 vec );
-
-/*
- * Copy sign from one 4-D vector to another, per element
- */
-static inline VmathSoaVector4 vmathSoaV4CopySignPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Maximum of two 4-D vectors per element
- */
-static inline VmathSoaVector4 vmathSoaV4MaxPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Minimum of two 4-D vectors per element
- */
-static inline VmathSoaVector4 vmathSoaV4MinPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Maximum element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4MaxElem_V( VmathSoaVector4 vec );
-
-/*
- * Minimum element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4MinElem_V( VmathSoaVector4 vec );
-
-/*
- * Compute the sum of all elements of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4Sum_V( VmathSoaVector4 vec );
-
-/*
- * Compute the dot product of two 4-D vectors
- */
-static inline vec_float4 vmathSoaV4Dot_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Compute the square of the length of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4LengthSqr_V( VmathSoaVector4 vec );
-
-/*
- * Compute the length of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4Length_V( VmathSoaVector4 vec );
-
-/*
- * Normalize a 4-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline VmathSoaVector4 vmathSoaV4Normalize_V( VmathSoaVector4 vec );
-
-/*
- * Outer product of two 4-D vectors
- */
-static inline VmathSoaMatrix4 vmathSoaV4Outer_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Linear interpolation between two 4-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaVector4 vmathSoaV4Lerp_V( vec_float4 t, VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Spherical linear interpolation between two 4-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaVector4 vmathSoaV4Slerp_V( vec_float4 t, VmathSoaVector4 unitVec0, VmathSoaVector4 unitVec1 );
-
-/*
- * Conditionally select between two 4-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaVector4 vmathSoaV4Select_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1, vec_uint4 select1 );
-
-/*
- * Store four slots of an SoA 4-D vector as half-floats
- */
-static inline void vmathSoaV4StoreHalfFloats_V( VmathSoaVector4 vec, vec_ushort8 *twoQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV4Print_V( VmathSoaVector4 vec );
-
-/*
- * Print a 4-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV4Prints_V( VmathSoaVector4 vec, const char *name );
-
-#endif
-
-/*
- * Construct a 3-D point from x, y, and z elements
- */
-static inline VmathSoaPoint3 vmathSoaP3MakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z );
-
-/*
- * Copy elements from a 3-D vector into a 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaP3MakeFromV3_V( VmathSoaVector3 vec );
-
-/*
- * Set all elements of a 3-D point to the same scalar value
- */
-static inline VmathSoaPoint3 vmathSoaP3MakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaP3MakeFromAos_V( VmathPoint3 pnt );
-
-/*
- * Insert four AoS 3-D points
- */
-static inline VmathSoaPoint3 vmathSoaP3MakeFrom4Aos_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3 );
-
-/*
- * Extract four AoS 3-D points
- */
-static inline void vmathSoaP3Get4Aos_V( VmathSoaPoint3 pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 );
-
-/*
- * Set the x element of a 3-D point
- */
-static inline void vmathSoaP3SetX_V( VmathSoaPoint3 *result, vec_float4 x );
-
-/*
- * Set the y element of a 3-D point
- */
-static inline void vmathSoaP3SetY_V( VmathSoaPoint3 *result, vec_float4 y );
-
-/*
- * Set the z element of a 3-D point
- */
-static inline void vmathSoaP3SetZ_V( VmathSoaPoint3 *result, vec_float4 z );
-
-/*
- * Get the x element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3GetX_V( VmathSoaPoint3 pnt );
-
-/*
- * Get the y element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3GetY_V( VmathSoaPoint3 pnt );
-
-/*
- * Get the z element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3GetZ_V( VmathSoaPoint3 pnt );
-
-/*
- * Set an x, y, or z element of a 3-D point by index
- */
-static inline void vmathSoaP3SetElem_V( VmathSoaPoint3 *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, or z element of a 3-D point by index
- */
-static inline vec_float4 vmathSoaP3GetElem_V( VmathSoaPoint3 pnt, int idx );
-
-/*
- * Subtract a 3-D point from another 3-D point
- */
-static inline VmathSoaVector3 vmathSoaP3Sub_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Add a 3-D point to a 3-D vector
- */
-static inline VmathSoaPoint3 vmathSoaP3AddV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec );
-
-/*
- * Subtract a 3-D vector from a 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaP3SubV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec );
-
-/*
- * Multiply two 3-D points per element
- */
-static inline VmathSoaPoint3 vmathSoaP3MulPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Divide two 3-D points per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathSoaPoint3 vmathSoaP3DivPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Compute the reciprocal of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathSoaPoint3 vmathSoaP3RecipPerElem_V( VmathSoaPoint3 pnt );
-
-/*
- * Compute the square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathSoaPoint3 vmathSoaP3SqrtPerElem_V( VmathSoaPoint3 pnt );
-
-/*
- * Compute the reciprocal square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathSoaPoint3 vmathSoaP3RsqrtPerElem_V( VmathSoaPoint3 pnt );
-
-/*
- * Compute the absolute value of a 3-D point per element
- */
-static inline VmathSoaPoint3 vmathSoaP3AbsPerElem_V( VmathSoaPoint3 pnt );
-
-/*
- * Copy sign from one 3-D point to another, per element
- */
-static inline VmathSoaPoint3 vmathSoaP3CopySignPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Maximum of two 3-D points per element
- */
-static inline VmathSoaPoint3 vmathSoaP3MaxPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Minimum of two 3-D points per element
- */
-static inline VmathSoaPoint3 vmathSoaP3MinPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Maximum element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3MaxElem_V( VmathSoaPoint3 pnt );
-
-/*
- * Minimum element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3MinElem_V( VmathSoaPoint3 pnt );
-
-/*
- * Compute the sum of all elements of a 3-D point
- */
-static inline vec_float4 vmathSoaP3Sum_V( VmathSoaPoint3 pnt );
-
-/*
- * Apply uniform scale to a 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaP3Scale_V( VmathSoaPoint3 pnt, vec_float4 scaleVal );
-
-/*
- * Apply non-uniform scale to a 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaP3NonUniformScale_V( VmathSoaPoint3 pnt, VmathSoaVector3 scaleVec );
-
-/*
- * Scalar projection of a 3-D point on a unit-length 3-D vector
- */
-static inline vec_float4 vmathSoaP3Projection_V( VmathSoaPoint3 pnt, VmathSoaVector3 unitVec );
-
-/*
- * Compute the square of the distance of a 3-D point from the coordinate-system origin
- */
-static inline vec_float4 vmathSoaP3DistSqrFromOrigin_V( VmathSoaPoint3 pnt );
-
-/*
- * Compute the distance of a 3-D point from the coordinate-system origin
- */
-static inline vec_float4 vmathSoaP3DistFromOrigin_V( VmathSoaPoint3 pnt );
-
-/*
- * Compute the square of the distance between two 3-D points
- */
-static inline vec_float4 vmathSoaP3DistSqr_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Compute the distance between two 3-D points
- */
-static inline vec_float4 vmathSoaP3Dist_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Linear interpolation between two 3-D points
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaPoint3 vmathSoaP3Lerp_V( vec_float4 t, VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Conditionally select between two 3-D points
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaPoint3 vmathSoaP3Select_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_uint4 select1 );
-
-/*
- * Load four three-float 3-D points, stored in three quadwords
- */
-static inline void vmathSoaP3LoadXYZArray_V( VmathSoaPoint3 *pnt, const vec_float4 *threeQuads );
-
-/*
- * Store four slots of an SoA 3-D point in three quadwords
- */
-static inline void vmathSoaP3StoreXYZArray_V( VmathSoaPoint3 pnt, vec_float4 *threeQuads );
-
-/*
- * Store eight slots of two SoA 3-D points as half-floats
- */
-static inline void vmathSoaP3StoreHalfFloats_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D point
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaP3Print_V( VmathSoaPoint3 pnt );
-
-/*
- * Print a 3-D point and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaP3Prints_V( VmathSoaPoint3 pnt, const char *name );
-
-#endif
-
-/*
- * Construct a quaternion from x, y, z, and w elements
- */
-static inline VmathSoaQuat vmathSoaQMakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
-
-/*
- * Construct a quaternion from a 3-D vector and a scalar
- */
-static inline VmathSoaQuat vmathSoaQMakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 w );
-
-/*
- * Copy elements from a 4-D vector into a quaternion
- */
-static inline VmathSoaQuat vmathSoaQMakeFromV4_V( VmathSoaVector4 vec );
-
-/*
- * Convert a rotation matrix to a unit-length quaternion
- */
-static inline VmathSoaQuat vmathSoaQMakeFromM3_V( VmathSoaMatrix3 rotMat );
-
-/*
- * Set all elements of a quaternion to the same scalar value
- */
-static inline VmathSoaQuat vmathSoaQMakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS quaternion
- */
-static inline VmathSoaQuat vmathSoaQMakeFromAos_V( VmathQuat quat );
-
-/*
- * Insert four AoS quaternions
- */
-static inline VmathSoaQuat vmathSoaQMakeFrom4Aos_V( VmathQuat quat0, VmathQuat quat1, VmathQuat quat2, VmathQuat quat3 );
-
-/*
- * Extract four AoS quaternions
- */
-static inline void vmathSoaQGet4Aos_V( VmathSoaQuat quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 );
-
-/*
- * Set the x, y, and z elements of a quaternion
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathSoaQSetXYZ_V( VmathSoaQuat *result, VmathSoaVector3 vec );
-
-/*
- * Get the x, y, and z elements of a quaternion
- */
-static inline VmathSoaVector3 vmathSoaQGetXYZ_V( VmathSoaQuat quat );
-
-/*
- * Set the x element of a quaternion
- */
-static inline void vmathSoaQSetX_V( VmathSoaQuat *result, vec_float4 x );
-
-/*
- * Set the y element of a quaternion
- */
-static inline void vmathSoaQSetY_V( VmathSoaQuat *result, vec_float4 y );
-
-/*
- * Set the z element of a quaternion
- */
-static inline void vmathSoaQSetZ_V( VmathSoaQuat *result, vec_float4 z );
-
-/*
- * Set the w element of a quaternion
- */
-static inline void vmathSoaQSetW_V( VmathSoaQuat *result, vec_float4 w );
-
-/*
- * Get the x element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetX_V( VmathSoaQuat quat );
-
-/*
- * Get the y element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetY_V( VmathSoaQuat quat );
-
-/*
- * Get the z element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetZ_V( VmathSoaQuat quat );
-
-/*
- * Get the w element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetW_V( VmathSoaQuat quat );
-
-/*
- * Set an x, y, z, or w element of a quaternion by index
- */
-static inline void vmathSoaQSetElem_V( VmathSoaQuat *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, z, or w element of a quaternion by index
- */
-static inline vec_float4 vmathSoaQGetElem_V( VmathSoaQuat quat, int idx );
-
-/*
- * Add two quaternions
- */
-static inline VmathSoaQuat vmathSoaQAdd_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
-
-/*
- * Subtract a quaternion from another quaternion
- */
-static inline VmathSoaQuat vmathSoaQSub_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
-
-/*
- * Multiply two quaternions
- */
-static inline VmathSoaQuat vmathSoaQMul_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
-
-/*
- * Multiply a quaternion by a scalar
- */
-static inline VmathSoaQuat vmathSoaQScalarMul_V( VmathSoaQuat quat, vec_float4 scalar );
-
-/*
- * Divide a quaternion by a scalar
- */
-static inline VmathSoaQuat vmathSoaQScalarDiv_V( VmathSoaQuat quat, vec_float4 scalar );
-
-/*
- * Negate all elements of a quaternion
- */
-static inline VmathSoaQuat vmathSoaQNeg_V( VmathSoaQuat quat );
-
-/*
- * Construct an identity quaternion
- */
-static inline VmathSoaQuat vmathSoaQMakeIdentity_V( );
-
-/*
- * Construct a quaternion to rotate between two unit-length 3-D vectors
- * NOTE: 
- * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
- */
-static inline VmathSoaQuat vmathSoaQMakeRotationArc_V( VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 );
-
-/*
- * Construct a quaternion to rotate around a unit-length 3-D vector
- */
-static inline VmathSoaQuat vmathSoaQMakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
-
-/*
- * Construct a quaternion to rotate around the x axis
- */
-static inline VmathSoaQuat vmathSoaQMakeRotationX_V( vec_float4 radians );
-
-/*
- * Construct a quaternion to rotate around the y axis
- */
-static inline VmathSoaQuat vmathSoaQMakeRotationY_V( vec_float4 radians );
-
-/*
- * Construct a quaternion to rotate around the z axis
- */
-static inline VmathSoaQuat vmathSoaQMakeRotationZ_V( vec_float4 radians );
-
-/*
- * Compute the conjugate of a quaternion
- */
-static inline VmathSoaQuat vmathSoaQConj_V( VmathSoaQuat quat );
-
-/*
- * Use a unit-length quaternion to rotate a 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaQRotate_V( VmathSoaQuat unitQuat, VmathSoaVector3 vec );
-
-/*
- * Compute the dot product of two quaternions
- */
-static inline vec_float4 vmathSoaQDot_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
-
-/*
- * Compute the norm of a quaternion
- */
-static inline vec_float4 vmathSoaQNorm_V( VmathSoaQuat quat );
-
-/*
- * Compute the length of a quaternion
- */
-static inline vec_float4 vmathSoaQLength_V( VmathSoaQuat quat );
-
-/*
- * Normalize a quaternion
- * NOTE: 
- * The result is unpredictable when all elements of quat are at or near zero.
- */
-static inline VmathSoaQuat vmathSoaQNormalize_V( VmathSoaQuat quat );
-
-/*
- * Linear interpolation between two quaternions
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaQuat vmathSoaQLerp_V( vec_float4 t, VmathSoaQuat quat0, VmathSoaQuat quat1 );
-
-/*
- * Spherical linear interpolation between two quaternions
- * NOTE: 
- * Interpolates along the shortest path between orientations.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaQuat vmathSoaQSlerp_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1 );
-
-/*
- * Spherical quadrangle interpolation
- */
-static inline VmathSoaQuat vmathSoaQSquad_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1, VmathSoaQuat unitQuat2, VmathSoaQuat unitQuat3 );
-
-/*
- * Conditionally select between two quaternions
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaQuat vmathSoaQSelect_V( VmathSoaQuat quat0, VmathSoaQuat quat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a quaternion
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaQPrint_V( VmathSoaQuat quat );
-
-/*
- * Print a quaternion and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaQPrints_V( VmathSoaQuat quat, const char *name );
-
-#endif
-
-/*
- * Construct a 3x3 matrix containing the specified columns
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromCols_V( VmathSoaVector3 col0, VmathSoaVector3 col1, VmathSoaVector3 col2 );
-
-/*
- * Construct a 3x3 rotation matrix from a unit-length quaternion
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromQ_V( VmathSoaQuat unitQuat );
-
-/*
- * Set all elements of a 3x3 matrix to the same scalar value
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS 3x3 matrix
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromAos_V( VmathMatrix3 mat );
-
-/*
- * Insert four AoS 3x3 matrices
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeFrom4Aos_V( VmathMatrix3 mat0, VmathMatrix3 mat1, VmathMatrix3 mat2, VmathMatrix3 mat3 );
-
-/*
- * Extract four AoS 3x3 matrices
- */
-static inline void vmathSoaM3Get4Aos_V( VmathSoaMatrix3 mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 );
-
-/*
- * Set column 0 of a 3x3 matrix
- */
-static inline void vmathSoaM3SetCol0_V( VmathSoaMatrix3 *result, VmathSoaVector3 col0 );
-
-/*
- * Set column 1 of a 3x3 matrix
- */
-static inline void vmathSoaM3SetCol1_V( VmathSoaMatrix3 *result, VmathSoaVector3 col1 );
-
-/*
- * Set column 2 of a 3x3 matrix
- */
-static inline void vmathSoaM3SetCol2_V( VmathSoaMatrix3 *result, VmathSoaVector3 col2 );
-
-/*
- * Get column 0 of a 3x3 matrix
- */
-static inline VmathSoaVector3 vmathSoaM3GetCol0_V( VmathSoaMatrix3 mat );
-
-/*
- * Get column 1 of a 3x3 matrix
- */
-static inline VmathSoaVector3 vmathSoaM3GetCol1_V( VmathSoaMatrix3 mat );
-
-/*
- * Get column 2 of a 3x3 matrix
- */
-static inline VmathSoaVector3 vmathSoaM3GetCol2_V( VmathSoaMatrix3 mat );
-
-/*
- * Set the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathSoaM3SetCol_V( VmathSoaMatrix3 *result, int col, VmathSoaVector3 vec );
-
-/*
- * Set the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathSoaM3SetRow_V( VmathSoaMatrix3 *result, int row, VmathSoaVector3 vec );
-
-/*
- * Get the column of a 3x3 matrix referred to by the specified index
- */
-static inline VmathSoaVector3 vmathSoaM3GetCol_V( VmathSoaMatrix3 mat, int col );
-
-/*
- * Get the row of a 3x3 matrix referred to by the specified index
- */
-static inline VmathSoaVector3 vmathSoaM3GetRow_V( VmathSoaMatrix3 mat, int row );
-
-/*
- * Set the element of a 3x3 matrix referred to by column and row indices
- */
-static inline void vmathSoaM3SetElem_V( VmathSoaMatrix3 *result, int col, int row, vec_float4 val );
-
-/*
- * Get the element of a 3x3 matrix referred to by column and row indices
- */
-static inline vec_float4 vmathSoaM3GetElem_V( VmathSoaMatrix3 mat, int col, int row );
-
-/*
- * Add two 3x3 matrices
- */
-static inline VmathSoaMatrix3 vmathSoaM3Add_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
-
-/*
- * Subtract a 3x3 matrix from another 3x3 matrix
- */
-static inline VmathSoaMatrix3 vmathSoaM3Sub_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
-
-/*
- * Negate all elements of a 3x3 matrix
- */
-static inline VmathSoaMatrix3 vmathSoaM3Neg_V( VmathSoaMatrix3 mat );
-
-/*
- * Multiply a 3x3 matrix by a scalar
- */
-static inline VmathSoaMatrix3 vmathSoaM3ScalarMul_V( VmathSoaMatrix3 mat, vec_float4 scalar );
-
-/*
- * Multiply a 3x3 matrix by a 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaM3MulV3_V( VmathSoaMatrix3 mat, VmathSoaVector3 vec );
-
-/*
- * Multiply two 3x3 matrices
- */
-static inline VmathSoaMatrix3 vmathSoaM3Mul_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
-
-/*
- * Construct an identity 3x3 matrix
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeIdentity_V( );
-
-/*
- * Construct a 3x3 matrix to rotate around the x axis
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationX_V( vec_float4 radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the y axis
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationY_V( vec_float4 radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the z axis
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZ_V( vec_float4 radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the x, y, and z axes
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ );
-
-/*
- * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationQ_V( VmathSoaQuat unitQuat );
-
-/*
- * Construct a 3x3 matrix to perform scaling
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeScale_V( VmathSoaVector3 scaleVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathSoaMatrix3 vmathSoaM3AppendScale_V( VmathSoaMatrix3 mat, VmathSoaVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathSoaMatrix3 vmathSoaM3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix3 mat );
-
-/*
- * Multiply two 3x3 matrices per element
- */
-static inline VmathSoaMatrix3 vmathSoaM3MulPerElem_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
-
-/*
- * Compute the absolute value of a 3x3 matrix per element
- */
-static inline VmathSoaMatrix3 vmathSoaM3AbsPerElem_V( VmathSoaMatrix3 mat );
-
-/*
- * Transpose of a 3x3 matrix
- */
-static inline VmathSoaMatrix3 vmathSoaM3Transpose_V( VmathSoaMatrix3 mat );
-
-/*
- * Compute the inverse of a 3x3 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathSoaMatrix3 vmathSoaM3Inverse_V( VmathSoaMatrix3 mat );
-
-/*
- * Determinant of a 3x3 matrix
- */
-static inline vec_float4 vmathSoaM3Determinant_V( VmathSoaMatrix3 mat );
-
-/*
- * Conditionally select between two 3x3 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaMatrix3 vmathSoaM3Select_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x3 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM3Print_V( VmathSoaMatrix3 mat );
-
-/*
- * Print a 3x3 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM3Prints_V( VmathSoaMatrix3 mat, const char *name );
-
-#endif
-
-/*
- * Construct a 4x4 matrix containing the specified columns
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromCols_V( VmathSoaVector4 col0, VmathSoaVector4 col1, VmathSoaVector4 col2, VmathSoaVector4 col3 );
-
-/*
- * Construct a 4x4 matrix from a 3x4 transformation matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromT3_V( VmathSoaTransform3 mat );
-
-/*
- * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromM3V3_V( VmathSoaMatrix3 mat, VmathSoaVector3 translateVec );
-
-/*
- * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec );
-
-/*
- * Set all elements of a 4x4 matrix to the same scalar value
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS 4x4 matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromAos_V( VmathMatrix4 mat );
-
-/*
- * Insert four AoS 4x4 matrices
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFrom4Aos_V( VmathMatrix4 mat0, VmathMatrix4 mat1, VmathMatrix4 mat2, VmathMatrix4 mat3 );
-
-/*
- * Extract four AoS 4x4 matrices
- */
-static inline void vmathSoaM4Get4Aos_V( VmathSoaMatrix4 mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 );
-
-/*
- * Set the upper-left 3x3 submatrix
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathSoaM4SetUpper3x3_V( VmathSoaMatrix4 *result, VmathSoaMatrix3 mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 4x4 matrix
- */
-static inline VmathSoaMatrix3 vmathSoaM4GetUpper3x3_V( VmathSoaMatrix4 mat );
-
-/*
- * Set translation component
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathSoaM4SetTranslation_V( VmathSoaMatrix4 *result, VmathSoaVector3 translateVec );
-
-/*
- * Get the translation component of a 4x4 matrix
- */
-static inline VmathSoaVector3 vmathSoaM4GetTranslation_V( VmathSoaMatrix4 mat );
-
-/*
- * Set column 0 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol0_V( VmathSoaMatrix4 *result, VmathSoaVector4 col0 );
-
-/*
- * Set column 1 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol1_V( VmathSoaMatrix4 *result, VmathSoaVector4 col1 );
-
-/*
- * Set column 2 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol2_V( VmathSoaMatrix4 *result, VmathSoaVector4 col2 );
-
-/*
- * Set column 3 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol3_V( VmathSoaMatrix4 *result, VmathSoaVector4 col3 );
-
-/*
- * Get column 0 of a 4x4 matrix
- */
-static inline VmathSoaVector4 vmathSoaM4GetCol0_V( VmathSoaMatrix4 mat );
-
-/*
- * Get column 1 of a 4x4 matrix
- */
-static inline VmathSoaVector4 vmathSoaM4GetCol1_V( VmathSoaMatrix4 mat );
-
-/*
- * Get column 2 of a 4x4 matrix
- */
-static inline VmathSoaVector4 vmathSoaM4GetCol2_V( VmathSoaMatrix4 mat );
-
-/*
- * Get column 3 of a 4x4 matrix
- */
-static inline VmathSoaVector4 vmathSoaM4GetCol3_V( VmathSoaMatrix4 mat );
-
-/*
- * Set the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathSoaM4SetCol_V( VmathSoaMatrix4 *result, int col, VmathSoaVector4 vec );
-
-/*
- * Set the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathSoaM4SetRow_V( VmathSoaMatrix4 *result, int row, VmathSoaVector4 vec );
-
-/*
- * Get the column of a 4x4 matrix referred to by the specified index
- */
-static inline VmathSoaVector4 vmathSoaM4GetCol_V( VmathSoaMatrix4 mat, int col );
-
-/*
- * Get the row of a 4x4 matrix referred to by the specified index
- */
-static inline VmathSoaVector4 vmathSoaM4GetRow_V( VmathSoaMatrix4 mat, int row );
-
-/*
- * Set the element of a 4x4 matrix referred to by column and row indices
- */
-static inline void vmathSoaM4SetElem_V( VmathSoaMatrix4 *result, int col, int row, vec_float4 val );
-
-/*
- * Get the element of a 4x4 matrix referred to by column and row indices
- */
-static inline vec_float4 vmathSoaM4GetElem_V( VmathSoaMatrix4 mat, int col, int row );
-
-/*
- * Add two 4x4 matrices
- */
-static inline VmathSoaMatrix4 vmathSoaM4Add_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
-
-/*
- * Subtract a 4x4 matrix from another 4x4 matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4Sub_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
-
-/*
- * Negate all elements of a 4x4 matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4Neg_V( VmathSoaMatrix4 mat );
-
-/*
- * Multiply a 4x4 matrix by a scalar
- */
-static inline VmathSoaMatrix4 vmathSoaM4ScalarMul_V( VmathSoaMatrix4 mat, vec_float4 scalar );
-
-/*
- * Multiply a 4x4 matrix by a 4-D vector
- */
-static inline VmathSoaVector4 vmathSoaM4MulV4_V( VmathSoaMatrix4 mat, VmathSoaVector4 vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D vector
- */
-static inline VmathSoaVector4 vmathSoaM4MulV3_V( VmathSoaMatrix4 mat, VmathSoaVector3 vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D point
- */
-static inline VmathSoaVector4 vmathSoaM4MulP3_V( VmathSoaMatrix4 mat, VmathSoaPoint3 pnt );
-
-/*
- * Multiply two 4x4 matrices
- */
-static inline VmathSoaMatrix4 vmathSoaM4Mul_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
-
-/*
- * Multiply a 4x4 matrix by a 3x4 transformation matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4MulT3_V( VmathSoaMatrix4 mat, VmathSoaTransform3 tfrm );
-
-/*
- * Construct an identity 4x4 matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeIdentity_V( );
-
-/*
- * Construct a 4x4 matrix to rotate around the x axis
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationX_V( vec_float4 radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the y axis
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationY_V( vec_float4 radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the z axis
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZ_V( vec_float4 radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the x, y, and z axes
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZYX_V( VmathSoaVector3 radiansXYZ );
-
-/*
- * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationQ_V( VmathSoaQuat unitQuat );
-
-/*
- * Construct a 4x4 matrix to perform scaling
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeScale_V( VmathSoaVector3 scaleVec );
-
-/*
- * Construct a 4x4 matrix to perform translation
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeTranslation_V( VmathSoaVector3 translateVec );
-
-/*
- * Construct viewing matrix based on eye position, position looked at, and up direction
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeLookAt_V( VmathSoaPoint3 eyePos, VmathSoaPoint3 lookAtPos, VmathSoaVector3 upVec );
-
-/*
- * Construct a perspective projection matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakePerspective_V( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar );
-
-/*
- * Construct a perspective projection matrix based on frustum
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFrustum_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
-
-/*
- * Construct an orthographic projection matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeOrthographic_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
-
-/*
- * Append (post-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathSoaMatrix4 vmathSoaM4AppendScale_V( VmathSoaMatrix4 mat, VmathSoaVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathSoaMatrix4 vmathSoaM4PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix4 mat );
-
-/*
- * Multiply two 4x4 matrices per element
- */
-static inline VmathSoaMatrix4 vmathSoaM4MulPerElem_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
-
-/*
- * Compute the absolute value of a 4x4 matrix per element
- */
-static inline VmathSoaMatrix4 vmathSoaM4AbsPerElem_V( VmathSoaMatrix4 mat );
-
-/*
- * Transpose of a 4x4 matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4Transpose_V( VmathSoaMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathSoaMatrix4 vmathSoaM4Inverse_V( VmathSoaMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathSoaMatrix4 vmathSoaM4AffineInverse_V( VmathSoaMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
- */
-static inline VmathSoaMatrix4 vmathSoaM4OrthoInverse_V( VmathSoaMatrix4 mat );
-
-/*
- * Determinant of a 4x4 matrix
- */
-static inline vec_float4 vmathSoaM4Determinant_V( VmathSoaMatrix4 mat );
-
-/*
- * Conditionally select between two 4x4 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaMatrix4 vmathSoaM4Select_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4x4 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM4Print_V( VmathSoaMatrix4 mat );
-
-/*
- * Print a 4x4 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM4Prints_V( VmathSoaMatrix4 mat, const char *name );
-
-#endif
-
-/*
- * Construct a 3x4 transformation matrix containing the specified columns
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeFromCols_V( VmathSoaVector3 col0, VmathSoaVector3 col1, VmathSoaVector3 col2, VmathSoaVector3 col3 );
-
-/*
- * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeFromM3V3_V( VmathSoaMatrix3 tfrm, VmathSoaVector3 translateVec );
-
-/*
- * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec );
-
-/*
- * Set all elements of a 3x4 transformation matrix to the same scalar value
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS 3x4 transformation matrix
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeFromAos_V( VmathTransform3 tfrm );
-
-/*
- * Insert four AoS 3x4 transformation matrices
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeFrom4Aos_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, VmathTransform3 tfrm2, VmathTransform3 tfrm3 );
-
-/*
- * Extract four AoS 3x4 transformation matrices
- */
-static inline void vmathSoaT3Get4Aos_V( VmathSoaTransform3 tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 );
-
-/*
- * Set the upper-left 3x3 submatrix
- */
-static inline void vmathSoaT3SetUpper3x3_V( VmathSoaTransform3 *result, VmathSoaMatrix3 mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
- */
-static inline VmathSoaMatrix3 vmathSoaT3GetUpper3x3_V( VmathSoaTransform3 tfrm );
-
-/*
- * Set translation component
- */
-static inline void vmathSoaT3SetTranslation_V( VmathSoaTransform3 *result, VmathSoaVector3 translateVec );
-
-/*
- * Get the translation component of a 3x4 transformation matrix
- */
-static inline VmathSoaVector3 vmathSoaT3GetTranslation_V( VmathSoaTransform3 tfrm );
-
-/*
- * Set column 0 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol0_V( VmathSoaTransform3 *result, VmathSoaVector3 col0 );
-
-/*
- * Set column 1 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol1_V( VmathSoaTransform3 *result, VmathSoaVector3 col1 );
-
-/*
- * Set column 2 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol2_V( VmathSoaTransform3 *result, VmathSoaVector3 col2 );
-
-/*
- * Set column 3 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol3_V( VmathSoaTransform3 *result, VmathSoaVector3 col3 );
-
-/*
- * Get column 0 of a 3x4 transformation matrix
- */
-static inline VmathSoaVector3 vmathSoaT3GetCol0_V( VmathSoaTransform3 tfrm );
-
-/*
- * Get column 1 of a 3x4 transformation matrix
- */
-static inline VmathSoaVector3 vmathSoaT3GetCol1_V( VmathSoaTransform3 tfrm );
-
-/*
- * Get column 2 of a 3x4 transformation matrix
- */
-static inline VmathSoaVector3 vmathSoaT3GetCol2_V( VmathSoaTransform3 tfrm );
-
-/*
- * Get column 3 of a 3x4 transformation matrix
- */
-static inline VmathSoaVector3 vmathSoaT3GetCol3_V( VmathSoaTransform3 tfrm );
-
-/*
- * Set the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathSoaT3SetCol_V( VmathSoaTransform3 *result, int col, VmathSoaVector3 vec );
-
-/*
- * Set the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathSoaT3SetRow_V( VmathSoaTransform3 *result, int row, VmathSoaVector4 vec );
-
-/*
- * Get the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline VmathSoaVector3 vmathSoaT3GetCol_V( VmathSoaTransform3 tfrm, int col );
-
-/*
- * Get the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline VmathSoaVector4 vmathSoaT3GetRow_V( VmathSoaTransform3 tfrm, int row );
-
-/*
- * Set the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline void vmathSoaT3SetElem_V( VmathSoaTransform3 *result, int col, int row, vec_float4 val );
-
-/*
- * Get the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline vec_float4 vmathSoaT3GetElem_V( VmathSoaTransform3 tfrm, int col, int row );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaT3MulV3_V( VmathSoaTransform3 tfrm, VmathSoaVector3 vec );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaT3MulP3_V( VmathSoaTransform3 tfrm, VmathSoaPoint3 pnt );
-
-/*
- * Multiply two 3x4 transformation matrices
- */
-static inline VmathSoaTransform3 vmathSoaT3Mul_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 );
-
-/*
- * Construct an identity 3x4 transformation matrix
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeIdentity_V( );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x axis
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationX_V( vec_float4 radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the y axis
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationY_V( vec_float4 radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the z axis
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationZ_V( vec_float4 radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationQ_V( VmathSoaQuat unitQuat );
-
-/*
- * Construct a 3x4 transformation matrix to perform scaling
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeScale_V( VmathSoaVector3 scaleVec );
-
-/*
- * Construct a 3x4 transformation matrix to perform translation
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeTranslation_V( VmathSoaVector3 translateVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathSoaTransform3 vmathSoaT3AppendScale_V( VmathSoaTransform3 tfrm, VmathSoaVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathSoaTransform3 vmathSoaT3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaTransform3 tfrm );
-
-/*
- * Multiply two 3x4 transformation matrices per element
- */
-static inline VmathSoaTransform3 vmathSoaT3MulPerElem_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 );
-
-/*
- * Compute the absolute value of a 3x4 transformation matrix per element
- */
-static inline VmathSoaTransform3 vmathSoaT3AbsPerElem_V( VmathSoaTransform3 tfrm );
-
-/*
- * Inverse of a 3x4 transformation matrix
- * NOTE: 
- * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
- */
-static inline VmathSoaTransform3 vmathSoaT3Inverse_V( VmathSoaTransform3 tfrm );
-
-/*
- * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
- */
-static inline VmathSoaTransform3 vmathSoaT3OrthoInverse_V( VmathSoaTransform3 tfrm );
-
-/*
- * Conditionally select between two 3x4 transformation matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaTransform3 vmathSoaT3Select_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x4 transformation matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaT3Print_V( VmathSoaTransform3 tfrm );
-
-/*
- * Print a 3x4 transformation matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaT3Prints_V( VmathSoaTransform3 tfrm, const char *name );
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#include "vectormath_soa.h"
-#include "vec_soa_v.h"
-#include "quat_soa_v.h"
-#include "mat_soa_v.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_SOA_C_V_PPU_H
+#define _VECTORMATH_SOA_C_V_PPU_H
+
+#include <math.h>
+#include <altivec.h>
+#include "vectormath_aos_v.h"
+
+#ifdef _VECTORMATH_DEBUG
+#include <stdio.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef _VECTORMATH_SOA_C_TYPES_H
+#define _VECTORMATH_SOA_C_TYPES_H
+
+/* A set of four 3-D vectors in structure-of-arrays format
+ */
+typedef struct _VmathSoaVector3
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+} VmathSoaVector3;
+
+/* A set of four 4-D vectors in structure-of-arrays format
+ */
+typedef struct _VmathSoaVector4
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+    vec_float4 w;
+} VmathSoaVector4;
+
+/* A set of four 3-D points in structure-of-arrays format
+ */
+typedef struct _VmathSoaPoint3
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+} VmathSoaPoint3;
+
+/* A set of four quaternions in structure-of-arrays format
+ */
+typedef struct _VmathSoaQuat
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+    vec_float4 w;
+} VmathSoaQuat;
+
+/* A set of four 3x3 matrices in structure-of-arrays format
+ */
+typedef struct _VmathSoaMatrix3
+{
+    VmathSoaVector3 col0;
+    VmathSoaVector3 col1;
+    VmathSoaVector3 col2;
+} VmathSoaMatrix3;
+
+/* A set of four 4x4 matrices in structure-of-arrays format
+ */
+typedef struct _VmathSoaMatrix4
+{
+    VmathSoaVector4 col0;
+    VmathSoaVector4 col1;
+    VmathSoaVector4 col2;
+    VmathSoaVector4 col3;
+} VmathSoaMatrix4;
+
+/* A set of four 3x4 transformation matrices in structure-of-arrays format
+ */
+typedef struct _VmathSoaTransform3
+{
+    VmathSoaVector3 col0;
+    VmathSoaVector3 col1;
+    VmathSoaVector3 col2;
+    VmathSoaVector3 col3;
+} VmathSoaTransform3;
+
+#endif
+
+/*
+ * Construct a 3-D vector from x, y, and z elements
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z );
+
+/*
+ * Copy elements from a 3-D point into a 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeFromP3_V( VmathSoaPoint3 pnt );
+
+/*
+ * Set all elements of a 3-D vector to the same scalar value
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeFromAos_V( VmathVector3 vec );
+
+/*
+ * Insert four AoS 3-D vectors
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeFrom4Aos_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3 );
+
+/*
+ * Extract four AoS 3-D vectors
+ */
+static inline void vmathSoaV3Get4Aos_V( VmathSoaVector3 vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 );
+
+/*
+ * Set the x element of a 3-D vector
+ */
+static inline void vmathSoaV3SetX_V( VmathSoaVector3 *result, vec_float4 x );
+
+/*
+ * Set the y element of a 3-D vector
+ */
+static inline void vmathSoaV3SetY_V( VmathSoaVector3 *result, vec_float4 y );
+
+/*
+ * Set the z element of a 3-D vector
+ */
+static inline void vmathSoaV3SetZ_V( VmathSoaVector3 *result, vec_float4 z );
+
+/*
+ * Get the x element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3GetX_V( VmathSoaVector3 vec );
+
+/*
+ * Get the y element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3GetY_V( VmathSoaVector3 vec );
+
+/*
+ * Get the z element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3GetZ_V( VmathSoaVector3 vec );
+
+/*
+ * Set an x, y, or z element of a 3-D vector by index
+ */
+static inline void vmathSoaV3SetElem_V( VmathSoaVector3 *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, or z element of a 3-D vector by index
+ */
+static inline vec_float4 vmathSoaV3GetElem_V( VmathSoaVector3 vec, int idx );
+
+/*
+ * Add two 3-D vectors
+ */
+static inline VmathSoaVector3 vmathSoaV3Add_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Subtract a 3-D vector from another 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaV3Sub_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Add a 3-D vector to a 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaV3AddP3_V( VmathSoaVector3 vec, VmathSoaPoint3 pnt );
+
+/*
+ * Multiply a 3-D vector by a scalar
+ */
+static inline VmathSoaVector3 vmathSoaV3ScalarMul_V( VmathSoaVector3 vec, vec_float4 scalar );
+
+/*
+ * Divide a 3-D vector by a scalar
+ */
+static inline VmathSoaVector3 vmathSoaV3ScalarDiv_V( VmathSoaVector3 vec, vec_float4 scalar );
+
+/*
+ * Negate all elements of a 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaV3Neg_V( VmathSoaVector3 vec );
+
+/*
+ * Construct x axis
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeXAxis_V( );
+
+/*
+ * Construct y axis
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeYAxis_V( );
+
+/*
+ * Construct z axis
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeZAxis_V( );
+
+/*
+ * Multiply two 3-D vectors per element
+ */
+static inline VmathSoaVector3 vmathSoaV3MulPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Divide two 3-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathSoaVector3 vmathSoaV3DivPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Compute the reciprocal of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathSoaVector3 vmathSoaV3RecipPerElem_V( VmathSoaVector3 vec );
+
+/*
+ * Compute the square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathSoaVector3 vmathSoaV3SqrtPerElem_V( VmathSoaVector3 vec );
+
+/*
+ * Compute the reciprocal square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathSoaVector3 vmathSoaV3RsqrtPerElem_V( VmathSoaVector3 vec );
+
+/*
+ * Compute the absolute value of a 3-D vector per element
+ */
+static inline VmathSoaVector3 vmathSoaV3AbsPerElem_V( VmathSoaVector3 vec );
+
+/*
+ * Copy sign from one 3-D vector to another, per element
+ */
+static inline VmathSoaVector3 vmathSoaV3CopySignPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Maximum of two 3-D vectors per element
+ */
+static inline VmathSoaVector3 vmathSoaV3MaxPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Minimum of two 3-D vectors per element
+ */
+static inline VmathSoaVector3 vmathSoaV3MinPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Maximum element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3MaxElem_V( VmathSoaVector3 vec );
+
+/*
+ * Minimum element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3MinElem_V( VmathSoaVector3 vec );
+
+/*
+ * Compute the sum of all elements of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3Sum_V( VmathSoaVector3 vec );
+
+/*
+ * Compute the dot product of two 3-D vectors
+ */
+static inline vec_float4 vmathSoaV3Dot_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Compute the square of the length of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3LengthSqr_V( VmathSoaVector3 vec );
+
+/*
+ * Compute the length of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3Length_V( VmathSoaVector3 vec );
+
+/*
+ * Normalize a 3-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline VmathSoaVector3 vmathSoaV3Normalize_V( VmathSoaVector3 vec );
+
+/*
+ * Compute cross product of two 3-D vectors
+ */
+static inline VmathSoaVector3 vmathSoaV3Cross_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Outer product of two 3-D vectors
+ */
+static inline VmathSoaMatrix3 vmathSoaV3Outer_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Pre-multiply a row vector by a 3x3 matrix
+ */
+static inline VmathSoaVector3 vmathSoaV3RowMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat );
+
+/*
+ * Cross-product matrix of a 3-D vector
+ */
+static inline VmathSoaMatrix3 vmathSoaV3CrossMatrix_V( VmathSoaVector3 vec );
+
+/*
+ * Create cross-product matrix and multiply
+ * NOTE: 
+ * Faster than separately creating a cross-product matrix and multiplying.
+ */
+static inline VmathSoaMatrix3 vmathSoaV3CrossMatrixMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat );
+
+/*
+ * Linear interpolation between two 3-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaVector3 vmathSoaV3Lerp_V( vec_float4 t, VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Spherical linear interpolation between two 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaVector3 vmathSoaV3Slerp_V( vec_float4 t, VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 );
+
+/*
+ * Conditionally select between two 3-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaVector3 vmathSoaV3Select_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_uint4 select1 );
+
+/*
+ * Load four three-float 3-D vectors, stored in three quadwords
+ */
+static inline void vmathSoaV3LoadXYZArray_V( VmathSoaVector3 *vec, const vec_float4 *threeQuads );
+
+/*
+ * Store four slots of an SoA 3-D vector in three quadwords
+ */
+static inline void vmathSoaV3StoreXYZArray_V( VmathSoaVector3 vec, vec_float4 *threeQuads );
+
+/*
+ * Store eight slots of two SoA 3-D vectors as half-floats
+ */
+static inline void vmathSoaV3StoreHalfFloats_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV3Print_V( VmathSoaVector3 vec );
+
+/*
+ * Print a 3-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV3Prints_V( VmathSoaVector3 vec, const char *name );
+
+#endif
+
+/*
+ * Construct a 4-D vector from x, y, z, and w elements
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
+
+/*
+ * Construct a 4-D vector from a 3-D vector and a scalar
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 w );
+
+/*
+ * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromV3_V( VmathSoaVector3 vec );
+
+/*
+ * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromP3_V( VmathSoaPoint3 pnt );
+
+/*
+ * Copy elements from a quaternion into a 4-D vector
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromQ_V( VmathSoaQuat quat );
+
+/*
+ * Set all elements of a 4-D vector to the same scalar value
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS 4-D vector
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromAos_V( VmathVector4 vec );
+
+/*
+ * Insert four AoS 4-D vectors
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFrom4Aos_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3 );
+
+/*
+ * Extract four AoS 4-D vectors
+ */
+static inline void vmathSoaV4Get4Aos_V( VmathSoaVector4 vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 );
+
+/*
+ * Set the x, y, and z elements of a 4-D vector
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathSoaV4SetXYZ_V( VmathSoaVector4 *result, VmathSoaVector3 vec );
+
+/*
+ * Get the x, y, and z elements of a 4-D vector
+ */
+static inline VmathSoaVector3 vmathSoaV4GetXYZ_V( VmathSoaVector4 vec );
+
+/*
+ * Set the x element of a 4-D vector
+ */
+static inline void vmathSoaV4SetX_V( VmathSoaVector4 *result, vec_float4 x );
+
+/*
+ * Set the y element of a 4-D vector
+ */
+static inline void vmathSoaV4SetY_V( VmathSoaVector4 *result, vec_float4 y );
+
+/*
+ * Set the z element of a 4-D vector
+ */
+static inline void vmathSoaV4SetZ_V( VmathSoaVector4 *result, vec_float4 z );
+
+/*
+ * Set the w element of a 4-D vector
+ */
+static inline void vmathSoaV4SetW_V( VmathSoaVector4 *result, vec_float4 w );
+
+/*
+ * Get the x element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetX_V( VmathSoaVector4 vec );
+
+/*
+ * Get the y element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetY_V( VmathSoaVector4 vec );
+
+/*
+ * Get the z element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetZ_V( VmathSoaVector4 vec );
+
+/*
+ * Get the w element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetW_V( VmathSoaVector4 vec );
+
+/*
+ * Set an x, y, z, or w element of a 4-D vector by index
+ */
+static inline void vmathSoaV4SetElem_V( VmathSoaVector4 *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, z, or w element of a 4-D vector by index
+ */
+static inline vec_float4 vmathSoaV4GetElem_V( VmathSoaVector4 vec, int idx );
+
+/*
+ * Add two 4-D vectors
+ */
+static inline VmathSoaVector4 vmathSoaV4Add_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Subtract a 4-D vector from another 4-D vector
+ */
+static inline VmathSoaVector4 vmathSoaV4Sub_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Multiply a 4-D vector by a scalar
+ */
+static inline VmathSoaVector4 vmathSoaV4ScalarMul_V( VmathSoaVector4 vec, vec_float4 scalar );
+
+/*
+ * Divide a 4-D vector by a scalar
+ */
+static inline VmathSoaVector4 vmathSoaV4ScalarDiv_V( VmathSoaVector4 vec, vec_float4 scalar );
+
+/*
+ * Negate all elements of a 4-D vector
+ */
+static inline VmathSoaVector4 vmathSoaV4Neg_V( VmathSoaVector4 vec );
+
+/*
+ * Construct x axis
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeXAxis_V( );
+
+/*
+ * Construct y axis
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeYAxis_V( );
+
+/*
+ * Construct z axis
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeZAxis_V( );
+
+/*
+ * Construct w axis
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeWAxis_V( );
+
+/*
+ * Multiply two 4-D vectors per element
+ */
+static inline VmathSoaVector4 vmathSoaV4MulPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Divide two 4-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathSoaVector4 vmathSoaV4DivPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Compute the reciprocal of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathSoaVector4 vmathSoaV4RecipPerElem_V( VmathSoaVector4 vec );
+
+/*
+ * Compute the square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathSoaVector4 vmathSoaV4SqrtPerElem_V( VmathSoaVector4 vec );
+
+/*
+ * Compute the reciprocal square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathSoaVector4 vmathSoaV4RsqrtPerElem_V( VmathSoaVector4 vec );
+
+/*
+ * Compute the absolute value of a 4-D vector per element
+ */
+static inline VmathSoaVector4 vmathSoaV4AbsPerElem_V( VmathSoaVector4 vec );
+
+/*
+ * Copy sign from one 4-D vector to another, per element
+ */
+static inline VmathSoaVector4 vmathSoaV4CopySignPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Maximum of two 4-D vectors per element
+ */
+static inline VmathSoaVector4 vmathSoaV4MaxPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Minimum of two 4-D vectors per element
+ */
+static inline VmathSoaVector4 vmathSoaV4MinPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Maximum element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4MaxElem_V( VmathSoaVector4 vec );
+
+/*
+ * Minimum element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4MinElem_V( VmathSoaVector4 vec );
+
+/*
+ * Compute the sum of all elements of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4Sum_V( VmathSoaVector4 vec );
+
+/*
+ * Compute the dot product of two 4-D vectors
+ */
+static inline vec_float4 vmathSoaV4Dot_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Compute the square of the length of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4LengthSqr_V( VmathSoaVector4 vec );
+
+/*
+ * Compute the length of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4Length_V( VmathSoaVector4 vec );
+
+/*
+ * Normalize a 4-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline VmathSoaVector4 vmathSoaV4Normalize_V( VmathSoaVector4 vec );
+
+/*
+ * Outer product of two 4-D vectors
+ */
+static inline VmathSoaMatrix4 vmathSoaV4Outer_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Linear interpolation between two 4-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaVector4 vmathSoaV4Lerp_V( vec_float4 t, VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Spherical linear interpolation between two 4-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaVector4 vmathSoaV4Slerp_V( vec_float4 t, VmathSoaVector4 unitVec0, VmathSoaVector4 unitVec1 );
+
+/*
+ * Conditionally select between two 4-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaVector4 vmathSoaV4Select_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1, vec_uint4 select1 );
+
+/*
+ * Store four slots of an SoA 4-D vector as half-floats
+ */
+static inline void vmathSoaV4StoreHalfFloats_V( VmathSoaVector4 vec, vec_ushort8 *twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV4Print_V( VmathSoaVector4 vec );
+
+/*
+ * Print a 4-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV4Prints_V( VmathSoaVector4 vec, const char *name );
+
+#endif
+
+/*
+ * Construct a 3-D point from x, y, and z elements
+ */
+static inline VmathSoaPoint3 vmathSoaP3MakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z );
+
+/*
+ * Copy elements from a 3-D vector into a 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaP3MakeFromV3_V( VmathSoaVector3 vec );
+
+/*
+ * Set all elements of a 3-D point to the same scalar value
+ */
+static inline VmathSoaPoint3 vmathSoaP3MakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaP3MakeFromAos_V( VmathPoint3 pnt );
+
+/*
+ * Insert four AoS 3-D points
+ */
+static inline VmathSoaPoint3 vmathSoaP3MakeFrom4Aos_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3 );
+
+/*
+ * Extract four AoS 3-D points
+ */
+static inline void vmathSoaP3Get4Aos_V( VmathSoaPoint3 pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 );
+
+/*
+ * Set the x element of a 3-D point
+ */
+static inline void vmathSoaP3SetX_V( VmathSoaPoint3 *result, vec_float4 x );
+
+/*
+ * Set the y element of a 3-D point
+ */
+static inline void vmathSoaP3SetY_V( VmathSoaPoint3 *result, vec_float4 y );
+
+/*
+ * Set the z element of a 3-D point
+ */
+static inline void vmathSoaP3SetZ_V( VmathSoaPoint3 *result, vec_float4 z );
+
+/*
+ * Get the x element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3GetX_V( VmathSoaPoint3 pnt );
+
+/*
+ * Get the y element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3GetY_V( VmathSoaPoint3 pnt );
+
+/*
+ * Get the z element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3GetZ_V( VmathSoaPoint3 pnt );
+
+/*
+ * Set an x, y, or z element of a 3-D point by index
+ */
+static inline void vmathSoaP3SetElem_V( VmathSoaPoint3 *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, or z element of a 3-D point by index
+ */
+static inline vec_float4 vmathSoaP3GetElem_V( VmathSoaPoint3 pnt, int idx );
+
+/*
+ * Subtract a 3-D point from another 3-D point
+ */
+static inline VmathSoaVector3 vmathSoaP3Sub_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Add a 3-D point to a 3-D vector
+ */
+static inline VmathSoaPoint3 vmathSoaP3AddV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec );
+
+/*
+ * Subtract a 3-D vector from a 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaP3SubV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec );
+
+/*
+ * Multiply two 3-D points per element
+ */
+static inline VmathSoaPoint3 vmathSoaP3MulPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Divide two 3-D points per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathSoaPoint3 vmathSoaP3DivPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Compute the reciprocal of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathSoaPoint3 vmathSoaP3RecipPerElem_V( VmathSoaPoint3 pnt );
+
+/*
+ * Compute the square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathSoaPoint3 vmathSoaP3SqrtPerElem_V( VmathSoaPoint3 pnt );
+
+/*
+ * Compute the reciprocal square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathSoaPoint3 vmathSoaP3RsqrtPerElem_V( VmathSoaPoint3 pnt );
+
+/*
+ * Compute the absolute value of a 3-D point per element
+ */
+static inline VmathSoaPoint3 vmathSoaP3AbsPerElem_V( VmathSoaPoint3 pnt );
+
+/*
+ * Copy sign from one 3-D point to another, per element
+ */
+static inline VmathSoaPoint3 vmathSoaP3CopySignPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Maximum of two 3-D points per element
+ */
+static inline VmathSoaPoint3 vmathSoaP3MaxPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Minimum of two 3-D points per element
+ */
+static inline VmathSoaPoint3 vmathSoaP3MinPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Maximum element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3MaxElem_V( VmathSoaPoint3 pnt );
+
+/*
+ * Minimum element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3MinElem_V( VmathSoaPoint3 pnt );
+
+/*
+ * Compute the sum of all elements of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3Sum_V( VmathSoaPoint3 pnt );
+
+/*
+ * Apply uniform scale to a 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaP3Scale_V( VmathSoaPoint3 pnt, vec_float4 scaleVal );
+
+/*
+ * Apply non-uniform scale to a 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaP3NonUniformScale_V( VmathSoaPoint3 pnt, VmathSoaVector3 scaleVec );
+
+/*
+ * Scalar projection of a 3-D point on a unit-length 3-D vector
+ */
+static inline vec_float4 vmathSoaP3Projection_V( VmathSoaPoint3 pnt, VmathSoaVector3 unitVec );
+
+/*
+ * Compute the square of the distance of a 3-D point from the coordinate-system origin
+ */
+static inline vec_float4 vmathSoaP3DistSqrFromOrigin_V( VmathSoaPoint3 pnt );
+
+/*
+ * Compute the distance of a 3-D point from the coordinate-system origin
+ */
+static inline vec_float4 vmathSoaP3DistFromOrigin_V( VmathSoaPoint3 pnt );
+
+/*
+ * Compute the square of the distance between two 3-D points
+ */
+static inline vec_float4 vmathSoaP3DistSqr_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Compute the distance between two 3-D points
+ */
+static inline vec_float4 vmathSoaP3Dist_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Linear interpolation between two 3-D points
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaPoint3 vmathSoaP3Lerp_V( vec_float4 t, VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Conditionally select between two 3-D points
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaPoint3 vmathSoaP3Select_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_uint4 select1 );
+
+/*
+ * Load four three-float 3-D points, stored in three quadwords
+ */
+static inline void vmathSoaP3LoadXYZArray_V( VmathSoaPoint3 *pnt, const vec_float4 *threeQuads );
+
+/*
+ * Store four slots of an SoA 3-D point in three quadwords
+ */
+static inline void vmathSoaP3StoreXYZArray_V( VmathSoaPoint3 pnt, vec_float4 *threeQuads );
+
+/*
+ * Store eight slots of two SoA 3-D points as half-floats
+ */
+static inline void vmathSoaP3StoreHalfFloats_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D point
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaP3Print_V( VmathSoaPoint3 pnt );
+
+/*
+ * Print a 3-D point and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaP3Prints_V( VmathSoaPoint3 pnt, const char *name );
+
+#endif
+
+/*
+ * Construct a quaternion from x, y, z, and w elements
+ */
+static inline VmathSoaQuat vmathSoaQMakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
+
+/*
+ * Construct a quaternion from a 3-D vector and a scalar
+ */
+static inline VmathSoaQuat vmathSoaQMakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 w );
+
+/*
+ * Copy elements from a 4-D vector into a quaternion
+ */
+static inline VmathSoaQuat vmathSoaQMakeFromV4_V( VmathSoaVector4 vec );
+
+/*
+ * Convert a rotation matrix to a unit-length quaternion
+ */
+static inline VmathSoaQuat vmathSoaQMakeFromM3_V( VmathSoaMatrix3 rotMat );
+
+/*
+ * Set all elements of a quaternion to the same scalar value
+ */
+static inline VmathSoaQuat vmathSoaQMakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS quaternion
+ */
+static inline VmathSoaQuat vmathSoaQMakeFromAos_V( VmathQuat quat );
+
+/*
+ * Insert four AoS quaternions
+ */
+static inline VmathSoaQuat vmathSoaQMakeFrom4Aos_V( VmathQuat quat0, VmathQuat quat1, VmathQuat quat2, VmathQuat quat3 );
+
+/*
+ * Extract four AoS quaternions
+ */
+static inline void vmathSoaQGet4Aos_V( VmathSoaQuat quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 );
+
+/*
+ * Set the x, y, and z elements of a quaternion
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathSoaQSetXYZ_V( VmathSoaQuat *result, VmathSoaVector3 vec );
+
+/*
+ * Get the x, y, and z elements of a quaternion
+ */
+static inline VmathSoaVector3 vmathSoaQGetXYZ_V( VmathSoaQuat quat );
+
+/*
+ * Set the x element of a quaternion
+ */
+static inline void vmathSoaQSetX_V( VmathSoaQuat *result, vec_float4 x );
+
+/*
+ * Set the y element of a quaternion
+ */
+static inline void vmathSoaQSetY_V( VmathSoaQuat *result, vec_float4 y );
+
+/*
+ * Set the z element of a quaternion
+ */
+static inline void vmathSoaQSetZ_V( VmathSoaQuat *result, vec_float4 z );
+
+/*
+ * Set the w element of a quaternion
+ */
+static inline void vmathSoaQSetW_V( VmathSoaQuat *result, vec_float4 w );
+
+/*
+ * Get the x element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetX_V( VmathSoaQuat quat );
+
+/*
+ * Get the y element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetY_V( VmathSoaQuat quat );
+
+/*
+ * Get the z element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetZ_V( VmathSoaQuat quat );
+
+/*
+ * Get the w element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetW_V( VmathSoaQuat quat );
+
+/*
+ * Set an x, y, z, or w element of a quaternion by index
+ */
+static inline void vmathSoaQSetElem_V( VmathSoaQuat *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, z, or w element of a quaternion by index
+ */
+static inline vec_float4 vmathSoaQGetElem_V( VmathSoaQuat quat, int idx );
+
+/*
+ * Add two quaternions
+ */
+static inline VmathSoaQuat vmathSoaQAdd_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
+
+/*
+ * Subtract a quaternion from another quaternion
+ */
+static inline VmathSoaQuat vmathSoaQSub_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
+
+/*
+ * Multiply two quaternions
+ */
+static inline VmathSoaQuat vmathSoaQMul_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
+
+/*
+ * Multiply a quaternion by a scalar
+ */
+static inline VmathSoaQuat vmathSoaQScalarMul_V( VmathSoaQuat quat, vec_float4 scalar );
+
+/*
+ * Divide a quaternion by a scalar
+ */
+static inline VmathSoaQuat vmathSoaQScalarDiv_V( VmathSoaQuat quat, vec_float4 scalar );
+
+/*
+ * Negate all elements of a quaternion
+ */
+static inline VmathSoaQuat vmathSoaQNeg_V( VmathSoaQuat quat );
+
+/*
+ * Construct an identity quaternion
+ */
+static inline VmathSoaQuat vmathSoaQMakeIdentity_V( );
+
+/*
+ * Construct a quaternion to rotate between two unit-length 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+ */
+static inline VmathSoaQuat vmathSoaQMakeRotationArc_V( VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 );
+
+/*
+ * Construct a quaternion to rotate around a unit-length 3-D vector
+ */
+static inline VmathSoaQuat vmathSoaQMakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
+
+/*
+ * Construct a quaternion to rotate around the x axis
+ */
+static inline VmathSoaQuat vmathSoaQMakeRotationX_V( vec_float4 radians );
+
+/*
+ * Construct a quaternion to rotate around the y axis
+ */
+static inline VmathSoaQuat vmathSoaQMakeRotationY_V( vec_float4 radians );
+
+/*
+ * Construct a quaternion to rotate around the z axis
+ */
+static inline VmathSoaQuat vmathSoaQMakeRotationZ_V( vec_float4 radians );
+
+/*
+ * Compute the conjugate of a quaternion
+ */
+static inline VmathSoaQuat vmathSoaQConj_V( VmathSoaQuat quat );
+
+/*
+ * Use a unit-length quaternion to rotate a 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaQRotate_V( VmathSoaQuat unitQuat, VmathSoaVector3 vec );
+
+/*
+ * Compute the dot product of two quaternions
+ */
+static inline vec_float4 vmathSoaQDot_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
+
+/*
+ * Compute the norm of a quaternion
+ */
+static inline vec_float4 vmathSoaQNorm_V( VmathSoaQuat quat );
+
+/*
+ * Compute the length of a quaternion
+ */
+static inline vec_float4 vmathSoaQLength_V( VmathSoaQuat quat );
+
+/*
+ * Normalize a quaternion
+ * NOTE: 
+ * The result is unpredictable when all elements of quat are at or near zero.
+ */
+static inline VmathSoaQuat vmathSoaQNormalize_V( VmathSoaQuat quat );
+
+/*
+ * Linear interpolation between two quaternions
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaQuat vmathSoaQLerp_V( vec_float4 t, VmathSoaQuat quat0, VmathSoaQuat quat1 );
+
+/*
+ * Spherical linear interpolation between two quaternions
+ * NOTE: 
+ * Interpolates along the shortest path between orientations.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaQuat vmathSoaQSlerp_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1 );
+
+/*
+ * Spherical quadrangle interpolation
+ */
+static inline VmathSoaQuat vmathSoaQSquad_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1, VmathSoaQuat unitQuat2, VmathSoaQuat unitQuat3 );
+
+/*
+ * Conditionally select between two quaternions
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaQuat vmathSoaQSelect_V( VmathSoaQuat quat0, VmathSoaQuat quat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a quaternion
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaQPrint_V( VmathSoaQuat quat );
+
+/*
+ * Print a quaternion and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaQPrints_V( VmathSoaQuat quat, const char *name );
+
+#endif
+
+/*
+ * Construct a 3x3 matrix containing the specified columns
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromCols_V( VmathSoaVector3 col0, VmathSoaVector3 col1, VmathSoaVector3 col2 );
+
+/*
+ * Construct a 3x3 rotation matrix from a unit-length quaternion
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromQ_V( VmathSoaQuat unitQuat );
+
+/*
+ * Set all elements of a 3x3 matrix to the same scalar value
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3x3 matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromAos_V( VmathMatrix3 mat );
+
+/*
+ * Insert four AoS 3x3 matrices
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeFrom4Aos_V( VmathMatrix3 mat0, VmathMatrix3 mat1, VmathMatrix3 mat2, VmathMatrix3 mat3 );
+
+/*
+ * Extract four AoS 3x3 matrices
+ */
+static inline void vmathSoaM3Get4Aos_V( VmathSoaMatrix3 mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 );
+
+/*
+ * Set column 0 of a 3x3 matrix
+ */
+static inline void vmathSoaM3SetCol0_V( VmathSoaMatrix3 *result, VmathSoaVector3 col0 );
+
+/*
+ * Set column 1 of a 3x3 matrix
+ */
+static inline void vmathSoaM3SetCol1_V( VmathSoaMatrix3 *result, VmathSoaVector3 col1 );
+
+/*
+ * Set column 2 of a 3x3 matrix
+ */
+static inline void vmathSoaM3SetCol2_V( VmathSoaMatrix3 *result, VmathSoaVector3 col2 );
+
+/*
+ * Get column 0 of a 3x3 matrix
+ */
+static inline VmathSoaVector3 vmathSoaM3GetCol0_V( VmathSoaMatrix3 mat );
+
+/*
+ * Get column 1 of a 3x3 matrix
+ */
+static inline VmathSoaVector3 vmathSoaM3GetCol1_V( VmathSoaMatrix3 mat );
+
+/*
+ * Get column 2 of a 3x3 matrix
+ */
+static inline VmathSoaVector3 vmathSoaM3GetCol2_V( VmathSoaMatrix3 mat );
+
+/*
+ * Set the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathSoaM3SetCol_V( VmathSoaMatrix3 *result, int col, VmathSoaVector3 vec );
+
+/*
+ * Set the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathSoaM3SetRow_V( VmathSoaMatrix3 *result, int row, VmathSoaVector3 vec );
+
+/*
+ * Get the column of a 3x3 matrix referred to by the specified index
+ */
+static inline VmathSoaVector3 vmathSoaM3GetCol_V( VmathSoaMatrix3 mat, int col );
+
+/*
+ * Get the row of a 3x3 matrix referred to by the specified index
+ */
+static inline VmathSoaVector3 vmathSoaM3GetRow_V( VmathSoaMatrix3 mat, int row );
+
+/*
+ * Set the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline void vmathSoaM3SetElem_V( VmathSoaMatrix3 *result, int col, int row, vec_float4 val );
+
+/*
+ * Get the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline vec_float4 vmathSoaM3GetElem_V( VmathSoaMatrix3 mat, int col, int row );
+
+/*
+ * Add two 3x3 matrices
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Add_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
+
+/*
+ * Subtract a 3x3 matrix from another 3x3 matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Sub_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
+
+/*
+ * Negate all elements of a 3x3 matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Neg_V( VmathSoaMatrix3 mat );
+
+/*
+ * Multiply a 3x3 matrix by a scalar
+ */
+static inline VmathSoaMatrix3 vmathSoaM3ScalarMul_V( VmathSoaMatrix3 mat, vec_float4 scalar );
+
+/*
+ * Multiply a 3x3 matrix by a 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaM3MulV3_V( VmathSoaMatrix3 mat, VmathSoaVector3 vec );
+
+/*
+ * Multiply two 3x3 matrices
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Mul_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
+
+/*
+ * Construct an identity 3x3 matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeIdentity_V( );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x axis
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationX_V( vec_float4 radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the y axis
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationY_V( vec_float4 radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the z axis
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZ_V( vec_float4 radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x, y, and z axes
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ );
+
+/*
+ * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationQ_V( VmathSoaQuat unitQuat );
+
+/*
+ * Construct a 3x3 matrix to perform scaling
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeScale_V( VmathSoaVector3 scaleVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathSoaMatrix3 vmathSoaM3AppendScale_V( VmathSoaMatrix3 mat, VmathSoaVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathSoaMatrix3 vmathSoaM3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix3 mat );
+
+/*
+ * Multiply two 3x3 matrices per element
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MulPerElem_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
+
+/*
+ * Compute the absolute value of a 3x3 matrix per element
+ */
+static inline VmathSoaMatrix3 vmathSoaM3AbsPerElem_V( VmathSoaMatrix3 mat );
+
+/*
+ * Transpose of a 3x3 matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Transpose_V( VmathSoaMatrix3 mat );
+
+/*
+ * Compute the inverse of a 3x3 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Inverse_V( VmathSoaMatrix3 mat );
+
+/*
+ * Determinant of a 3x3 matrix
+ */
+static inline vec_float4 vmathSoaM3Determinant_V( VmathSoaMatrix3 mat );
+
+/*
+ * Conditionally select between two 3x3 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Select_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x3 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM3Print_V( VmathSoaMatrix3 mat );
+
+/*
+ * Print a 3x3 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM3Prints_V( VmathSoaMatrix3 mat, const char *name );
+
+#endif
+
+/*
+ * Construct a 4x4 matrix containing the specified columns
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromCols_V( VmathSoaVector4 col0, VmathSoaVector4 col1, VmathSoaVector4 col2, VmathSoaVector4 col3 );
+
+/*
+ * Construct a 4x4 matrix from a 3x4 transformation matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromT3_V( VmathSoaTransform3 mat );
+
+/*
+ * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromM3V3_V( VmathSoaMatrix3 mat, VmathSoaVector3 translateVec );
+
+/*
+ * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec );
+
+/*
+ * Set all elements of a 4x4 matrix to the same scalar value
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS 4x4 matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromAos_V( VmathMatrix4 mat );
+
+/*
+ * Insert four AoS 4x4 matrices
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFrom4Aos_V( VmathMatrix4 mat0, VmathMatrix4 mat1, VmathMatrix4 mat2, VmathMatrix4 mat3 );
+
+/*
+ * Extract four AoS 4x4 matrices
+ */
+static inline void vmathSoaM4Get4Aos_V( VmathSoaMatrix4 mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathSoaM4SetUpper3x3_V( VmathSoaMatrix4 *result, VmathSoaMatrix3 mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 4x4 matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaM4GetUpper3x3_V( VmathSoaMatrix4 mat );
+
+/*
+ * Set translation component
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathSoaM4SetTranslation_V( VmathSoaMatrix4 *result, VmathSoaVector3 translateVec );
+
+/*
+ * Get the translation component of a 4x4 matrix
+ */
+static inline VmathSoaVector3 vmathSoaM4GetTranslation_V( VmathSoaMatrix4 mat );
+
+/*
+ * Set column 0 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol0_V( VmathSoaMatrix4 *result, VmathSoaVector4 col0 );
+
+/*
+ * Set column 1 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol1_V( VmathSoaMatrix4 *result, VmathSoaVector4 col1 );
+
+/*
+ * Set column 2 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol2_V( VmathSoaMatrix4 *result, VmathSoaVector4 col2 );
+
+/*
+ * Set column 3 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol3_V( VmathSoaMatrix4 *result, VmathSoaVector4 col3 );
+
+/*
+ * Get column 0 of a 4x4 matrix
+ */
+static inline VmathSoaVector4 vmathSoaM4GetCol0_V( VmathSoaMatrix4 mat );
+
+/*
+ * Get column 1 of a 4x4 matrix
+ */
+static inline VmathSoaVector4 vmathSoaM4GetCol1_V( VmathSoaMatrix4 mat );
+
+/*
+ * Get column 2 of a 4x4 matrix
+ */
+static inline VmathSoaVector4 vmathSoaM4GetCol2_V( VmathSoaMatrix4 mat );
+
+/*
+ * Get column 3 of a 4x4 matrix
+ */
+static inline VmathSoaVector4 vmathSoaM4GetCol3_V( VmathSoaMatrix4 mat );
+
+/*
+ * Set the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathSoaM4SetCol_V( VmathSoaMatrix4 *result, int col, VmathSoaVector4 vec );
+
+/*
+ * Set the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathSoaM4SetRow_V( VmathSoaMatrix4 *result, int row, VmathSoaVector4 vec );
+
+/*
+ * Get the column of a 4x4 matrix referred to by the specified index
+ */
+static inline VmathSoaVector4 vmathSoaM4GetCol_V( VmathSoaMatrix4 mat, int col );
+
+/*
+ * Get the row of a 4x4 matrix referred to by the specified index
+ */
+static inline VmathSoaVector4 vmathSoaM4GetRow_V( VmathSoaMatrix4 mat, int row );
+
+/*
+ * Set the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline void vmathSoaM4SetElem_V( VmathSoaMatrix4 *result, int col, int row, vec_float4 val );
+
+/*
+ * Get the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline vec_float4 vmathSoaM4GetElem_V( VmathSoaMatrix4 mat, int col, int row );
+
+/*
+ * Add two 4x4 matrices
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Add_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
+
+/*
+ * Subtract a 4x4 matrix from another 4x4 matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Sub_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
+
+/*
+ * Negate all elements of a 4x4 matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Neg_V( VmathSoaMatrix4 mat );
+
+/*
+ * Multiply a 4x4 matrix by a scalar
+ */
+static inline VmathSoaMatrix4 vmathSoaM4ScalarMul_V( VmathSoaMatrix4 mat, vec_float4 scalar );
+
+/*
+ * Multiply a 4x4 matrix by a 4-D vector
+ */
+static inline VmathSoaVector4 vmathSoaM4MulV4_V( VmathSoaMatrix4 mat, VmathSoaVector4 vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D vector
+ */
+static inline VmathSoaVector4 vmathSoaM4MulV3_V( VmathSoaMatrix4 mat, VmathSoaVector3 vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D point
+ */
+static inline VmathSoaVector4 vmathSoaM4MulP3_V( VmathSoaMatrix4 mat, VmathSoaPoint3 pnt );
+
+/*
+ * Multiply two 4x4 matrices
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Mul_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
+
+/*
+ * Multiply a 4x4 matrix by a 3x4 transformation matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MulT3_V( VmathSoaMatrix4 mat, VmathSoaTransform3 tfrm );
+
+/*
+ * Construct an identity 4x4 matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeIdentity_V( );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x axis
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationX_V( vec_float4 radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the y axis
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationY_V( vec_float4 radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the z axis
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZ_V( vec_float4 radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x, y, and z axes
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZYX_V( VmathSoaVector3 radiansXYZ );
+
+/*
+ * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationQ_V( VmathSoaQuat unitQuat );
+
+/*
+ * Construct a 4x4 matrix to perform scaling
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeScale_V( VmathSoaVector3 scaleVec );
+
+/*
+ * Construct a 4x4 matrix to perform translation
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeTranslation_V( VmathSoaVector3 translateVec );
+
+/*
+ * Construct viewing matrix based on eye position, position looked at, and up direction
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeLookAt_V( VmathSoaPoint3 eyePos, VmathSoaPoint3 lookAtPos, VmathSoaVector3 upVec );
+
+/*
+ * Construct a perspective projection matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakePerspective_V( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar );
+
+/*
+ * Construct a perspective projection matrix based on frustum
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFrustum_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
+
+/*
+ * Construct an orthographic projection matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeOrthographic_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
+
+/*
+ * Append (post-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathSoaMatrix4 vmathSoaM4AppendScale_V( VmathSoaMatrix4 mat, VmathSoaVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathSoaMatrix4 vmathSoaM4PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix4 mat );
+
+/*
+ * Multiply two 4x4 matrices per element
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MulPerElem_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
+
+/*
+ * Compute the absolute value of a 4x4 matrix per element
+ */
+static inline VmathSoaMatrix4 vmathSoaM4AbsPerElem_V( VmathSoaMatrix4 mat );
+
+/*
+ * Transpose of a 4x4 matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Transpose_V( VmathSoaMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Inverse_V( VmathSoaMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathSoaMatrix4 vmathSoaM4AffineInverse_V( VmathSoaMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+ */
+static inline VmathSoaMatrix4 vmathSoaM4OrthoInverse_V( VmathSoaMatrix4 mat );
+
+/*
+ * Determinant of a 4x4 matrix
+ */
+static inline vec_float4 vmathSoaM4Determinant_V( VmathSoaMatrix4 mat );
+
+/*
+ * Conditionally select between two 4x4 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Select_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4x4 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM4Print_V( VmathSoaMatrix4 mat );
+
+/*
+ * Print a 4x4 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM4Prints_V( VmathSoaMatrix4 mat, const char *name );
+
+#endif
+
+/*
+ * Construct a 3x4 transformation matrix containing the specified columns
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeFromCols_V( VmathSoaVector3 col0, VmathSoaVector3 col1, VmathSoaVector3 col2, VmathSoaVector3 col3 );
+
+/*
+ * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeFromM3V3_V( VmathSoaMatrix3 tfrm, VmathSoaVector3 translateVec );
+
+/*
+ * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec );
+
+/*
+ * Set all elements of a 3x4 transformation matrix to the same scalar value
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3x4 transformation matrix
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeFromAos_V( VmathTransform3 tfrm );
+
+/*
+ * Insert four AoS 3x4 transformation matrices
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeFrom4Aos_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, VmathTransform3 tfrm2, VmathTransform3 tfrm3 );
+
+/*
+ * Extract four AoS 3x4 transformation matrices
+ */
+static inline void vmathSoaT3Get4Aos_V( VmathSoaTransform3 tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ */
+static inline void vmathSoaT3SetUpper3x3_V( VmathSoaTransform3 *result, VmathSoaMatrix3 mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaT3GetUpper3x3_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Set translation component
+ */
+static inline void vmathSoaT3SetTranslation_V( VmathSoaTransform3 *result, VmathSoaVector3 translateVec );
+
+/*
+ * Get the translation component of a 3x4 transformation matrix
+ */
+static inline VmathSoaVector3 vmathSoaT3GetTranslation_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Set column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol0_V( VmathSoaTransform3 *result, VmathSoaVector3 col0 );
+
+/*
+ * Set column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol1_V( VmathSoaTransform3 *result, VmathSoaVector3 col1 );
+
+/*
+ * Set column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol2_V( VmathSoaTransform3 *result, VmathSoaVector3 col2 );
+
+/*
+ * Set column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol3_V( VmathSoaTransform3 *result, VmathSoaVector3 col3 );
+
+/*
+ * Get column 0 of a 3x4 transformation matrix
+ */
+static inline VmathSoaVector3 vmathSoaT3GetCol0_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Get column 1 of a 3x4 transformation matrix
+ */
+static inline VmathSoaVector3 vmathSoaT3GetCol1_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Get column 2 of a 3x4 transformation matrix
+ */
+static inline VmathSoaVector3 vmathSoaT3GetCol2_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Get column 3 of a 3x4 transformation matrix
+ */
+static inline VmathSoaVector3 vmathSoaT3GetCol3_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Set the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathSoaT3SetCol_V( VmathSoaTransform3 *result, int col, VmathSoaVector3 vec );
+
+/*
+ * Set the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathSoaT3SetRow_V( VmathSoaTransform3 *result, int row, VmathSoaVector4 vec );
+
+/*
+ * Get the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline VmathSoaVector3 vmathSoaT3GetCol_V( VmathSoaTransform3 tfrm, int col );
+
+/*
+ * Get the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline VmathSoaVector4 vmathSoaT3GetRow_V( VmathSoaTransform3 tfrm, int row );
+
+/*
+ * Set the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline void vmathSoaT3SetElem_V( VmathSoaTransform3 *result, int col, int row, vec_float4 val );
+
+/*
+ * Get the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline vec_float4 vmathSoaT3GetElem_V( VmathSoaTransform3 tfrm, int col, int row );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaT3MulV3_V( VmathSoaTransform3 tfrm, VmathSoaVector3 vec );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaT3MulP3_V( VmathSoaTransform3 tfrm, VmathSoaPoint3 pnt );
+
+/*
+ * Multiply two 3x4 transformation matrices
+ */
+static inline VmathSoaTransform3 vmathSoaT3Mul_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 );
+
+/*
+ * Construct an identity 3x4 transformation matrix
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeIdentity_V( );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x axis
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationX_V( vec_float4 radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the y axis
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationY_V( vec_float4 radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the z axis
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationZ_V( vec_float4 radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationQ_V( VmathSoaQuat unitQuat );
+
+/*
+ * Construct a 3x4 transformation matrix to perform scaling
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeScale_V( VmathSoaVector3 scaleVec );
+
+/*
+ * Construct a 3x4 transformation matrix to perform translation
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeTranslation_V( VmathSoaVector3 translateVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathSoaTransform3 vmathSoaT3AppendScale_V( VmathSoaTransform3 tfrm, VmathSoaVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathSoaTransform3 vmathSoaT3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaTransform3 tfrm );
+
+/*
+ * Multiply two 3x4 transformation matrices per element
+ */
+static inline VmathSoaTransform3 vmathSoaT3MulPerElem_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 );
+
+/*
+ * Compute the absolute value of a 3x4 transformation matrix per element
+ */
+static inline VmathSoaTransform3 vmathSoaT3AbsPerElem_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Inverse of a 3x4 transformation matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+ */
+static inline VmathSoaTransform3 vmathSoaT3Inverse_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+ */
+static inline VmathSoaTransform3 vmathSoaT3OrthoInverse_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Conditionally select between two 3x4 transformation matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaTransform3 vmathSoaT3Select_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x4 transformation matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaT3Print_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Print a 3x4 transformation matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaT3Prints_V( VmathSoaTransform3 tfrm, const char *name );
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#include "vectormath_soa.h"
+#include "vec_soa_v.h"
+#include "quat_soa_v.h"
+#include "mat_soa_v.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/boolInVec.h b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/boolInVec.h
index dc678b803..351a6f67c 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/boolInVec.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/boolInVec.h
@@ -1,261 +1,261 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _BOOLINVEC_H
-#define _BOOLINVEC_H
-
-#include <math.h>
-#include <altivec.h>
-#include "../c/vec_types.h"
-#undef bool
-
-namespace Vectormath {
-
-class floatInVec;
-
-//--------------------------------------------------------------------------------------------------
-// boolInVec class
-//
-
-class boolInVec
-{
-    private:
-        vec_uint4 mData;
-
-        inline boolInVec(vec_uint4 vec);
-    public:
-        inline boolInVec() {}
-
-        // matches standard type conversions
-        //
-        inline boolInVec(floatInVec vec);
-
-        // explicit cast from bool
-        //
-        explicit inline boolInVec(bool scalar);
-
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-        // explicit cast to bool
-        // 
-        inline bool getAsBool() const;
-#else
-        // implicit cast to bool
-        // 
-        inline operator bool() const;
-#endif
-        
-        // get vector data
-        // bool value is splatted across all word slots of vector as 0 (false) or -1 (true)
-        //
-        inline vec_uint4 get128() const;
-
-        // operators
-        //
-        inline const boolInVec operator ! () const;
-        inline boolInVec& operator = (boolInVec vec);
-        inline boolInVec& operator &= (boolInVec vec);
-        inline boolInVec& operator ^= (boolInVec vec);
-        inline boolInVec& operator |= (boolInVec vec);
-
-        // friend functions
-        //
-        friend inline const boolInVec operator == (boolInVec vec0, boolInVec vec1);
-        friend inline const boolInVec operator != (boolInVec vec0, boolInVec vec1);
-        friend inline const boolInVec operator < (floatInVec vec0, floatInVec vec1);
-        friend inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1);
-        friend inline const boolInVec operator > (floatInVec vec0, floatInVec vec1);
-        friend inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1);
-        friend inline const boolInVec operator == (floatInVec vec0, floatInVec vec1);
-        friend inline const boolInVec operator != (floatInVec vec0, floatInVec vec1);
-        friend inline const boolInVec operator & (boolInVec vec0, boolInVec vec1);
-        friend inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1);
-        friend inline const boolInVec operator | (boolInVec vec0, boolInVec vec1);
-        friend inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1);
-};
-
-//--------------------------------------------------------------------------------------------------
-// boolInVec functions
-//
-
-// operators
-//
-inline const boolInVec operator == (boolInVec vec0, boolInVec vec1);
-inline const boolInVec operator != (boolInVec vec0, boolInVec vec1);
-inline const boolInVec operator & (boolInVec vec0, boolInVec vec1);
-inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1);
-inline const boolInVec operator | (boolInVec vec0, boolInVec vec1);
-
-// select between vec0 and vec1 using boolInVec.
-// false selects vec0, true selects vec1
-//
-inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1);
-
-} // namespace Vectormath
-
-//--------------------------------------------------------------------------------------------------
-// boolInVec implementation
-//
-
-#include "floatInVec.h"
-
-namespace Vectormath {
-
-inline
-boolInVec::boolInVec(vec_uint4 vec)
-{
-    mData = vec;
-}
-
-inline
-boolInVec::boolInVec(floatInVec vec)
-{
-    *this = (vec != floatInVec(0.0f));
-}
-
-inline
-boolInVec::boolInVec(bool scalar)
-{
-#ifdef __GNUC__
-    if (__builtin_constant_p(scalar))
-    {
-        const unsigned int mask = -(int)scalar;
-        mData = (vec_uint4){mask, mask, mask, mask};
-    }
-    else
-#endif
-    {
-        unsigned int mask = -(int)scalar;
-        vec_uint4 vec = vec_ld(0, &mask);
-        mData = vec_splat(vec_perm(vec, vec, vec_lvsl(0, &mask)), 0);
-    }
-}
-
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-inline
-bool
-boolInVec::getAsBool() const
-#else
-inline
-boolInVec::operator bool() const
-#endif
-{
-    return vec_all_gt(mData, ((vec_uint4){0,0,0,0}));
-}
-
-inline
-vec_uint4
-boolInVec::get128() const
-{
-    return mData;
-}
-
-inline
-const boolInVec
-boolInVec::operator ! () const
-{
-    return boolInVec(vec_nor(mData, mData));
-}
-
-inline
-boolInVec&
-boolInVec::operator = (boolInVec vec)
-{
-    mData = vec.mData;
-    return *this;
-}
-
-inline
-boolInVec&
-boolInVec::operator &= (boolInVec vec)
-{
-    *this = *this & vec;
-    return *this;
-}
-
-inline
-boolInVec&
-boolInVec::operator ^= (boolInVec vec)
-{
-    *this = *this ^ vec;
-    return *this;
-}
-
-inline
-boolInVec&
-boolInVec::operator |= (boolInVec vec)
-{
-    *this = *this | vec;
-    return *this;
-}
-
-inline
-const boolInVec
-operator == (boolInVec vec0, boolInVec vec1)
-{
-    return boolInVec((vec_uint4)vec_cmpeq(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator != (boolInVec vec0, boolInVec vec1)
-{
-    return !(vec0 == vec1);
-}
-    
-inline
-const boolInVec
-operator & (boolInVec vec0, boolInVec vec1)
-{
-    return boolInVec(vec_and(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator | (boolInVec vec0, boolInVec vec1)
-{
-    return boolInVec(vec_or(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator ^ (boolInVec vec0, boolInVec vec1)
-{
-    return boolInVec(vec_xor(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1)
-{
-    return boolInVec(vec_sel(vec0.get128(), vec1.get128(), select_vec1.get128()));
-}
- 
-} // namespace Vectormath
-
-#endif // boolInVec_h
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _BOOLINVEC_H
+#define _BOOLINVEC_H
+
+#include <math.h>
+#include <altivec.h>
+#include "../c/vec_types.h"
+#undef bool
+
+namespace Vectormath {
+
+class floatInVec;
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec class
+//
+
+class boolInVec
+{
+    private:
+        vec_uint4 mData;
+
+        inline boolInVec(vec_uint4 vec);
+    public:
+        inline boolInVec() {}
+
+        // matches standard type conversions
+        //
+        inline boolInVec(floatInVec vec);
+
+        // explicit cast from bool
+        //
+        explicit inline boolInVec(bool scalar);
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+        // explicit cast to bool
+        // 
+        inline bool getAsBool() const;
+#else
+        // implicit cast to bool
+        // 
+        inline operator bool() const;
+#endif
+        
+        // get vector data
+        // bool value is splatted across all word slots of vector as 0 (false) or -1 (true)
+        //
+        inline vec_uint4 get128() const;
+
+        // operators
+        //
+        inline const boolInVec operator ! () const;
+        inline boolInVec& operator = (boolInVec vec);
+        inline boolInVec& operator &= (boolInVec vec);
+        inline boolInVec& operator ^= (boolInVec vec);
+        inline boolInVec& operator |= (boolInVec vec);
+
+        // friend functions
+        //
+        friend inline const boolInVec operator == (boolInVec vec0, boolInVec vec1);
+        friend inline const boolInVec operator != (boolInVec vec0, boolInVec vec1);
+        friend inline const boolInVec operator < (floatInVec vec0, floatInVec vec1);
+        friend inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1);
+        friend inline const boolInVec operator > (floatInVec vec0, floatInVec vec1);
+        friend inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1);
+        friend inline const boolInVec operator == (floatInVec vec0, floatInVec vec1);
+        friend inline const boolInVec operator != (floatInVec vec0, floatInVec vec1);
+        friend inline const boolInVec operator & (boolInVec vec0, boolInVec vec1);
+        friend inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1);
+        friend inline const boolInVec operator | (boolInVec vec0, boolInVec vec1);
+        friend inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1);
+};
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec functions
+//
+
+// operators
+//
+inline const boolInVec operator == (boolInVec vec0, boolInVec vec1);
+inline const boolInVec operator != (boolInVec vec0, boolInVec vec1);
+inline const boolInVec operator & (boolInVec vec0, boolInVec vec1);
+inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1);
+inline const boolInVec operator | (boolInVec vec0, boolInVec vec1);
+
+// select between vec0 and vec1 using boolInVec.
+// false selects vec0, true selects vec1
+//
+inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1);
+
+} // namespace Vectormath
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec implementation
+//
+
+#include "floatInVec.h"
+
+namespace Vectormath {
+
+inline
+boolInVec::boolInVec(vec_uint4 vec)
+{
+    mData = vec;
+}
+
+inline
+boolInVec::boolInVec(floatInVec vec)
+{
+    *this = (vec != floatInVec(0.0f));
+}
+
+inline
+boolInVec::boolInVec(bool scalar)
+{
+#ifdef __GNUC__
+    if (__builtin_constant_p(scalar))
+    {
+        const unsigned int mask = -(int)scalar;
+        mData = (vec_uint4){mask, mask, mask, mask};
+    }
+    else
+#endif
+    {
+        unsigned int mask = -(int)scalar;
+        vec_uint4 vec = vec_ld(0, &mask);
+        mData = vec_splat(vec_perm(vec, vec, vec_lvsl(0, &mask)), 0);
+    }
+}
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+inline
+bool
+boolInVec::getAsBool() const
+#else
+inline
+boolInVec::operator bool() const
+#endif
+{
+    return vec_all_gt(mData, ((vec_uint4){0,0,0,0}));
+}
+
+inline
+vec_uint4
+boolInVec::get128() const
+{
+    return mData;
+}
+
+inline
+const boolInVec
+boolInVec::operator ! () const
+{
+    return boolInVec(vec_nor(mData, mData));
+}
+
+inline
+boolInVec&
+boolInVec::operator = (boolInVec vec)
+{
+    mData = vec.mData;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator &= (boolInVec vec)
+{
+    *this = *this & vec;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator ^= (boolInVec vec)
+{
+    *this = *this ^ vec;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator |= (boolInVec vec)
+{
+    *this = *this | vec;
+    return *this;
+}
+
+inline
+const boolInVec
+operator == (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec((vec_uint4)vec_cmpeq(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator != (boolInVec vec0, boolInVec vec1)
+{
+    return !(vec0 == vec1);
+}
+    
+inline
+const boolInVec
+operator & (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(vec_and(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator | (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(vec_or(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator ^ (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(vec_xor(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1)
+{
+    return boolInVec(vec_sel(vec0.get128(), vec1.get128(), select_vec1.get128()));
+}
+ 
+} // namespace Vectormath
+
+#endif // boolInVec_h
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/floatInVec.h b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/floatInVec.h
index 22b549b76..957971900 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/floatInVec.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/floatInVec.h
@@ -1,361 +1,361 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _FLOATINVEC_H
-#define _FLOATINVEC_H
-
-#include <math.h>
-#include <altivec.h>
-#include <stddef.h>
-#include <simdmath.h>
-#include "../c/vec_types.h"
-#undef bool
-
-namespace Vectormath {
-
-class boolInVec;
-
-//--------------------------------------------------------------------------------------------------
-// floatInVec class
-//
-
-class floatInVec
-{
-    private:
-        vec_float4 mData;
-
-        inline floatInVec(vec_float4 vec);
-    public:
-        inline floatInVec() {}
-
-        // matches standard type conversions
-        //
-        inline floatInVec(boolInVec vec);
-
-        // construct from a slot of vec_float4
-        //
-        inline floatInVec(vec_float4 vec, int slot);
-        
-        // explicit cast from float
-        //
-        explicit inline floatInVec(float scalar);
-
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-        // explicit cast to float
-        // 
-        inline float getAsFloat() const;
-#else
-        // implicit cast to float
-        //
-        inline operator float() const;
-#endif
-
-        // get vector data
-        // float value is splatted across all word slots of vector
-        //
-        inline vec_float4 get128() const;
-
-        // operators
-        // 
-        inline const floatInVec operator ++ (int);
-        inline const floatInVec operator -- (int);
-        inline floatInVec& operator ++ ();
-        inline floatInVec& operator -- ();
-        inline const floatInVec operator - () const;
-        inline floatInVec& operator = (floatInVec vec);
-        inline floatInVec& operator *= (floatInVec vec);
-        inline floatInVec& operator /= (floatInVec vec);
-        inline floatInVec& operator += (floatInVec vec);
-        inline floatInVec& operator -= (floatInVec vec);
-
-        // friend functions
-        //
-        friend inline const floatInVec operator * (floatInVec vec0, floatInVec vec1);
-        friend inline const floatInVec operator / (floatInVec vec0, floatInVec vec1);
-        friend inline const floatInVec operator + (floatInVec vec0, floatInVec vec1);
-        friend inline const floatInVec operator - (floatInVec vec0, floatInVec vec1);
-        friend inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1);
-};
-
-//--------------------------------------------------------------------------------------------------
-// floatInVec functions
-//
-
-// operators
-// 
-inline const floatInVec operator * (floatInVec vec0, floatInVec vec1);
-inline const floatInVec operator / (floatInVec vec0, floatInVec vec1);
-inline const floatInVec operator + (floatInVec vec0, floatInVec vec1);
-inline const floatInVec operator - (floatInVec vec0, floatInVec vec1);
-inline const boolInVec operator < (floatInVec vec0, floatInVec vec1);
-inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1);
-inline const boolInVec operator > (floatInVec vec0, floatInVec vec1);
-inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1);
-inline const boolInVec operator == (floatInVec vec0, floatInVec vec1);
-inline const boolInVec operator != (floatInVec vec0, floatInVec vec1);
-
-// select between vec0 and vec1 using boolInVec.
-// false selects vec0, true selects vec1
-//
-inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1);
-
-} // namespace Vectormath
-
-//--------------------------------------------------------------------------------------------------
-// floatInVec implementation
-//
-
-#include "boolInVec.h"
-
-namespace Vectormath {
-
-inline
-floatInVec::floatInVec(vec_float4 vec)
-{
-    mData = vec;
-}
-
-inline
-floatInVec::floatInVec(boolInVec vec)
-{
-    mData = vec_ctf(vec_sub((vec_uint4){0,0,0,0}, vec.get128()), 0);
-}
-
-inline
-floatInVec::floatInVec(vec_float4 vec, int slot)
-{
-#ifdef __GNUC__
-    if (__builtin_constant_p(slot))
-    {
-        mData = vec_splat(vec, slot);
-    }
-    else
-#endif
-    {
-        const vec_uchar16 shiftpattern = vec_lvsl(0, (float *)(size_t)(slot << 2));
-        mData = vec_splat(vec_perm(vec, vec, shiftpattern), 0);
-    }
-}
-
-inline
-floatInVec::floatInVec(float scalar)
-{
-#ifdef __GNUC__
-    if (__builtin_constant_p(scalar))
-    {
-        mData = (vec_float4){scalar, scalar, scalar, scalar};
-    }
-    else
-#endif
-    {
-        vec_float4 vec = vec_ld(0, &scalar);
-        mData = vec_splat(vec_perm(vec, vec, vec_lvsl(0, &scalar)), 0);
-    }
-}
-
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-inline
-float
-floatInVec::getAsFloat() const
-#else
-inline
-floatInVec::operator float() const
-#endif
-{
-    return *((float *)&mData);
-}
-
-inline
-vec_float4
-floatInVec::get128() const
-{
-    return mData;
-}
-
-inline
-const floatInVec
-floatInVec::operator ++ (int)
-{
-    vec_float4 olddata = mData;
-    operator ++();
-    return floatInVec(olddata);
-}
-
-inline
-const floatInVec
-floatInVec::operator -- (int)
-{
-    vec_float4 olddata = mData;
-    operator --();
-    return floatInVec(olddata);
-}
-
-inline
-floatInVec&
-floatInVec::operator ++ ()
-{
-    *this += floatInVec((vec_float4){1.0f,1.0f,1.0f,1.0f});
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator -- ()
-{
-    *this -= floatInVec((vec_float4){1.0f,1.0f,1.0f,1.0f});
-    return *this;
-}
-
-inline
-const floatInVec
-floatInVec::operator - () const
-{
-    return floatInVec((vec_float4)vec_xor((vec_uint4)mData, (vec_uint4){0x80000000,0x80000000,0x80000000,0x80000000}));
-}
-
-inline
-floatInVec&
-floatInVec::operator = (floatInVec vec)
-{
-    mData = vec.mData;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator *= (floatInVec vec)
-{
-    *this = *this * vec;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator /= (floatInVec vec)
-{
-    *this = *this / vec;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator += (floatInVec vec)
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator -= (floatInVec vec)
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline
-const floatInVec
-operator * (floatInVec vec0, floatInVec vec1)
-{
-    return floatInVec(vec_madd(vec0.get128(), vec1.get128(), (vec_float4){0,0,0,0}));
-}
-
-inline
-const floatInVec
-operator / (floatInVec num, floatInVec den)
-{
-    return floatInVec(divf4(num.get128(), den.get128()));
-}
-
-inline
-const floatInVec
-operator + (floatInVec vec0, floatInVec vec1)
-{
-    return floatInVec(vec_add(vec0.get128(), vec1.get128()));
-}
-
-inline
-const floatInVec
-operator - (floatInVec vec0, floatInVec vec1)
-{
-    return floatInVec(vec_sub(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator < (floatInVec vec0, floatInVec vec1)
-{
-    return boolInVec((vec_uint4)vec_cmpgt(vec1.get128(), vec0.get128()));
-}
-
-inline
-const boolInVec
-operator <= (floatInVec vec0, floatInVec vec1)
-{
-    return !(vec0 > vec1);
-}
-
-inline
-const boolInVec
-operator > (floatInVec vec0, floatInVec vec1)
-{
-    return boolInVec((vec_uint4)vec_cmpgt(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator >= (floatInVec vec0, floatInVec vec1)
-{
-    return !(vec0 < vec1);
-}
-
-inline
-const boolInVec
-operator == (floatInVec vec0, floatInVec vec1)
-{
-    return boolInVec((vec_uint4)vec_cmpeq(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator != (floatInVec vec0, floatInVec vec1)
-{
-    return !(vec0 == vec1);
-}
-    
-inline
-const floatInVec
-select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1)
-{
-    return floatInVec(vec_sel(vec0.get128(), vec1.get128(), select_vec1.get128()));
-}
-
-} // namespace Vectormath
-
-#endif // floatInVec_h
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _FLOATINVEC_H
+#define _FLOATINVEC_H
+
+#include <math.h>
+#include <altivec.h>
+#include <stddef.h>
+#include <simdmath.h>
+#include "../c/vec_types.h"
+#undef bool
+
+namespace Vectormath {
+
+class boolInVec;
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec class
+//
+
+class floatInVec
+{
+    private:
+        vec_float4 mData;
+
+        inline floatInVec(vec_float4 vec);
+    public:
+        inline floatInVec() {}
+
+        // matches standard type conversions
+        //
+        inline floatInVec(boolInVec vec);
+
+        // construct from a slot of vec_float4
+        //
+        inline floatInVec(vec_float4 vec, int slot);
+        
+        // explicit cast from float
+        //
+        explicit inline floatInVec(float scalar);
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+        // explicit cast to float
+        // 
+        inline float getAsFloat() const;
+#else
+        // implicit cast to float
+        //
+        inline operator float() const;
+#endif
+
+        // get vector data
+        // float value is splatted across all word slots of vector
+        //
+        inline vec_float4 get128() const;
+
+        // operators
+        // 
+        inline const floatInVec operator ++ (int);
+        inline const floatInVec operator -- (int);
+        inline floatInVec& operator ++ ();
+        inline floatInVec& operator -- ();
+        inline const floatInVec operator - () const;
+        inline floatInVec& operator = (floatInVec vec);
+        inline floatInVec& operator *= (floatInVec vec);
+        inline floatInVec& operator /= (floatInVec vec);
+        inline floatInVec& operator += (floatInVec vec);
+        inline floatInVec& operator -= (floatInVec vec);
+
+        // friend functions
+        //
+        friend inline const floatInVec operator * (floatInVec vec0, floatInVec vec1);
+        friend inline const floatInVec operator / (floatInVec vec0, floatInVec vec1);
+        friend inline const floatInVec operator + (floatInVec vec0, floatInVec vec1);
+        friend inline const floatInVec operator - (floatInVec vec0, floatInVec vec1);
+        friend inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1);
+};
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec functions
+//
+
+// operators
+// 
+inline const floatInVec operator * (floatInVec vec0, floatInVec vec1);
+inline const floatInVec operator / (floatInVec vec0, floatInVec vec1);
+inline const floatInVec operator + (floatInVec vec0, floatInVec vec1);
+inline const floatInVec operator - (floatInVec vec0, floatInVec vec1);
+inline const boolInVec operator < (floatInVec vec0, floatInVec vec1);
+inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1);
+inline const boolInVec operator > (floatInVec vec0, floatInVec vec1);
+inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1);
+inline const boolInVec operator == (floatInVec vec0, floatInVec vec1);
+inline const boolInVec operator != (floatInVec vec0, floatInVec vec1);
+
+// select between vec0 and vec1 using boolInVec.
+// false selects vec0, true selects vec1
+//
+inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1);
+
+} // namespace Vectormath
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec implementation
+//
+
+#include "boolInVec.h"
+
+namespace Vectormath {
+
+inline
+floatInVec::floatInVec(vec_float4 vec)
+{
+    mData = vec;
+}
+
+inline
+floatInVec::floatInVec(boolInVec vec)
+{
+    mData = vec_ctf(vec_sub((vec_uint4){0,0,0,0}, vec.get128()), 0);
+}
+
+inline
+floatInVec::floatInVec(vec_float4 vec, int slot)
+{
+#ifdef __GNUC__
+    if (__builtin_constant_p(slot))
+    {
+        mData = vec_splat(vec, slot);
+    }
+    else
+#endif
+    {
+        const vec_uchar16 shiftpattern = vec_lvsl(0, (float *)(size_t)(slot << 2));
+        mData = vec_splat(vec_perm(vec, vec, shiftpattern), 0);
+    }
+}
+
+inline
+floatInVec::floatInVec(float scalar)
+{
+#ifdef __GNUC__
+    if (__builtin_constant_p(scalar))
+    {
+        mData = (vec_float4){scalar, scalar, scalar, scalar};
+    }
+    else
+#endif
+    {
+        vec_float4 vec = vec_ld(0, &scalar);
+        mData = vec_splat(vec_perm(vec, vec, vec_lvsl(0, &scalar)), 0);
+    }
+}
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+inline
+float
+floatInVec::getAsFloat() const
+#else
+inline
+floatInVec::operator float() const
+#endif
+{
+    return *((float *)&mData);
+}
+
+inline
+vec_float4
+floatInVec::get128() const
+{
+    return mData;
+}
+
+inline
+const floatInVec
+floatInVec::operator ++ (int)
+{
+    vec_float4 olddata = mData;
+    operator ++();
+    return floatInVec(olddata);
+}
+
+inline
+const floatInVec
+floatInVec::operator -- (int)
+{
+    vec_float4 olddata = mData;
+    operator --();
+    return floatInVec(olddata);
+}
+
+inline
+floatInVec&
+floatInVec::operator ++ ()
+{
+    *this += floatInVec((vec_float4){1.0f,1.0f,1.0f,1.0f});
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator -- ()
+{
+    *this -= floatInVec((vec_float4){1.0f,1.0f,1.0f,1.0f});
+    return *this;
+}
+
+inline
+const floatInVec
+floatInVec::operator - () const
+{
+    return floatInVec((vec_float4)vec_xor((vec_uint4)mData, (vec_uint4){0x80000000,0x80000000,0x80000000,0x80000000}));
+}
+
+inline
+floatInVec&
+floatInVec::operator = (floatInVec vec)
+{
+    mData = vec.mData;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator *= (floatInVec vec)
+{
+    *this = *this * vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator /= (floatInVec vec)
+{
+    *this = *this / vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator += (floatInVec vec)
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator -= (floatInVec vec)
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline
+const floatInVec
+operator * (floatInVec vec0, floatInVec vec1)
+{
+    return floatInVec(vec_madd(vec0.get128(), vec1.get128(), (vec_float4){0,0,0,0}));
+}
+
+inline
+const floatInVec
+operator / (floatInVec num, floatInVec den)
+{
+    return floatInVec(divf4(num.get128(), den.get128()));
+}
+
+inline
+const floatInVec
+operator + (floatInVec vec0, floatInVec vec1)
+{
+    return floatInVec(vec_add(vec0.get128(), vec1.get128()));
+}
+
+inline
+const floatInVec
+operator - (floatInVec vec0, floatInVec vec1)
+{
+    return floatInVec(vec_sub(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator < (floatInVec vec0, floatInVec vec1)
+{
+    return boolInVec((vec_uint4)vec_cmpgt(vec1.get128(), vec0.get128()));
+}
+
+inline
+const boolInVec
+operator <= (floatInVec vec0, floatInVec vec1)
+{
+    return !(vec0 > vec1);
+}
+
+inline
+const boolInVec
+operator > (floatInVec vec0, floatInVec vec1)
+{
+    return boolInVec((vec_uint4)vec_cmpgt(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator >= (floatInVec vec0, floatInVec vec1)
+{
+    return !(vec0 < vec1);
+}
+
+inline
+const boolInVec
+operator == (floatInVec vec0, floatInVec vec1)
+{
+    return boolInVec((vec_uint4)vec_cmpeq(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator != (floatInVec vec0, floatInVec vec1)
+{
+    return !(vec0 == vec1);
+}
+    
+inline
+const floatInVec
+select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1)
+{
+    return floatInVec(vec_sel(vec0.get128(), vec1.get128(), select_vec1.get128()));
+}
+
+} // namespace Vectormath
+
+#endif // floatInVec_h
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/mat_aos.h b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/mat_aos.h
index 11cdcddf6..77a184725 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/mat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/mat_aos.h
@@ -1,2188 +1,2188 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_AOS_CPP_H
-#define _VECTORMATH_MAT_AOS_CPP_H
-
-namespace Vectormath {
-namespace Aos {
-
-//-----------------------------------------------------------------------------
-// Constants
-// for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
-
-#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B })
-#define _VECTORMATH_PERM_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_XZBX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X })     
-#define _VECTORMATH_PERM_CXXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_YAXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C })
-#define _VECTORMATH_PERM_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W, _VECTORMATH_PERM_Z })
-#define _VECTORMATH_PERM_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y })
-#define _VECTORMATH_PERM_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C })
-#define _VECTORMATH_PERM_ZAYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_BZXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
-#define _VECTORMATH_PERM_ZXXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_B })
-#define _VECTORMATH_PERM_YXXC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_C })
-#define _VECTORMATH_PERM_BBYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-inline Matrix3::Matrix3( const Matrix3 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-}
-
-inline Matrix3::Matrix3( float scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-}
-
-inline Matrix3::Matrix3( floatInVec scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-}
-
-inline Matrix3::Matrix3( Quat unitQuat )
-{
-    vec_float4 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2;
-    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-    vec_uint4 select_x = _VECTORMATH_MASK_0xF000;
-    vec_uint4 select_z = _VECTORMATH_MASK_0x00F0;
-    xyzw_2 = vec_add( unitQuat.get128(), unitQuat.get128() );
-    wwww = vec_splat( unitQuat.get128(), 3 );
-    yzxw = vec_perm( unitQuat.get128(), unitQuat.get128(), _VECTORMATH_PERM_YZXW );
-    zxyw = vec_perm( unitQuat.get128(), unitQuat.get128(), _VECTORMATH_PERM_ZXYW );
-    yzxw_2 = vec_perm( xyzw_2, xyzw_2, _VECTORMATH_PERM_YZXW );
-    zxyw_2 = vec_perm( xyzw_2, xyzw_2, _VECTORMATH_PERM_ZXYW );
-    tmp0 = vec_madd( yzxw_2, wwww, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmp1 = vec_nmsub( yzxw, yzxw_2, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-    tmp2 = vec_madd( yzxw, xyzw_2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmp0 = vec_madd( zxyw, xyzw_2, tmp0 );
-    tmp1 = vec_nmsub( zxyw, zxyw_2, tmp1 );
-    tmp2 = vec_nmsub( zxyw_2, wwww, tmp2 );
-    tmp3 = vec_sel( tmp0, tmp1, select_x );
-    tmp4 = vec_sel( tmp1, tmp2, select_x );
-    tmp5 = vec_sel( tmp2, tmp0, select_x );
-    mCol0 = Vector3( vec_sel( tmp3, tmp2, select_z ) );
-    mCol1 = Vector3( vec_sel( tmp4, tmp0, select_z ) );
-    mCol2 = Vector3( vec_sel( tmp5, tmp1, select_z ) );
-}
-
-inline Matrix3::Matrix3( Vector3 _col0, Vector3 _col1, Vector3 _col2 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-}
-
-inline Matrix3 & Matrix3::setCol0( Vector3 _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol1( Vector3 _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol2( Vector3 _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol( int col, Vector3 vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setRow( int row, Vector3 vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setElem( int col, int row, float val )
-{
-    (*this)[col].setElem(row, val);
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setElem( int col, int row, floatInVec val )
-{
-    Vector3 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline const floatInVec Matrix3::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector3 Matrix3::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector3 Matrix3::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector3 Matrix3::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector3 Matrix3::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Matrix3::getRow( int row ) const
-{
-    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
-}
-
-inline Vector3 & Matrix3::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Matrix3::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    return *this;
-}
-
-inline const Matrix3 transpose( const Matrix3 & mat )
-{
-    vec_float4 tmp0, tmp1, res0, res1, res2;
-    tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
-    tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
-    res0 = vec_mergeh( tmp0, mat.getCol1().get128() );
-    res1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
-    res2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX );
-    return Matrix3(
-        Vector3( res0 ),
-        Vector3( res1 ),
-        Vector3( res2 )
-    );
-}
-
-inline const Matrix3 inverse( const Matrix3 & mat )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    tmp2 = _vmathVfCross( mat.getCol0().get128(), mat.getCol1().get128() );
-    tmp0 = _vmathVfCross( mat.getCol1().get128(), mat.getCol2().get128() );
-    tmp1 = _vmathVfCross( mat.getCol2().get128(), mat.getCol0().get128() );
-    dot = _vmathVfDot3( tmp2, mat.getCol2().get128() );
-    dot = vec_splat( dot, 0 );
-    invdet = recipf4( dot );
-    tmp3 = vec_mergeh( tmp0, tmp2 );
-    tmp4 = vec_mergel( tmp0, tmp2 );
-    inv0 = vec_mergeh( tmp3, tmp1 );
-    inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
-    inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
-    inv0 = vec_madd( inv0, invdet, zero );
-    inv1 = vec_madd( inv1, invdet, zero );
-    inv2 = vec_madd( inv2, invdet, zero );
-    return Matrix3(
-        Vector3( inv0 ),
-        Vector3( inv1 ),
-        Vector3( inv2 )
-    );
-}
-
-inline const floatInVec determinant( const Matrix3 & mat )
-{
-    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
-}
-
-inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( mCol0 + mat.mCol0 ),
-        ( mCol1 + mat.mCol1 ),
-        ( mCol2 + mat.mCol2 )
-    );
-}
-
-inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( mCol0 - mat.mCol0 ),
-        ( mCol1 - mat.mCol1 ),
-        ( mCol2 - mat.mCol2 )
-    );
-}
-
-inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
-{
-    *this = *this + mat;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
-{
-    *this = *this - mat;
-    return *this;
-}
-
-inline const Matrix3 Matrix3::operator -( ) const
-{
-    return Matrix3(
-        ( -mCol0 ),
-        ( -mCol1 ),
-        ( -mCol2 )
-    );
-}
-
-inline const Matrix3 absPerElem( const Matrix3 & mat )
-{
-    return Matrix3(
-        absPerElem( mat.getCol0() ),
-        absPerElem( mat.getCol1() ),
-        absPerElem( mat.getCol2() )
-    );
-}
-
-inline const Matrix3 Matrix3::operator *( float scalar ) const
-{
-    return *this * floatInVec(scalar);
-}
-
-inline const Matrix3 Matrix3::operator *( floatInVec scalar ) const
-{
-    return Matrix3(
-        ( mCol0 * scalar ),
-        ( mCol1 * scalar ),
-        ( mCol2 * scalar )
-    );
-}
-
-inline Matrix3 & Matrix3::operator *=( float scalar )
-{
-    return *this *= floatInVec(scalar);
-}
-
-inline Matrix3 & Matrix3::operator *=( floatInVec scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Matrix3 operator *( float scalar, const Matrix3 & mat )
-{
-    return floatInVec(scalar) * mat;
-}
-
-inline const Matrix3 operator *( floatInVec scalar, const Matrix3 & mat )
-{
-    return mat * scalar;
-}
-
-inline const Vector3 Matrix3::operator *( Vector3 vec ) const
-{
-    vec_float4 res;
-    vec_float4 xxxx, yyyy, zzzz;
-    xxxx = vec_splat( vec.get128(), 0 );
-    yyyy = vec_splat( vec.get128(), 1 );
-    zzzz = vec_splat( vec.get128(), 2 );
-    res = vec_madd( mCol0.get128(), xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    res = vec_madd( mCol1.get128(), yyyy, res );
-    res = vec_madd( mCol2.get128(), zzzz, res );
-    return Vector3( res );
-}
-
-inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( *this * mat.mCol0 ),
-        ( *this * mat.mCol1 ),
-        ( *this * mat.mCol2 )
-    );
-}
-
-inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
-{
-    *this = *this * mat;
-    return *this;
-}
-
-inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
-{
-    return Matrix3(
-        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
-        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
-        mulPerElem( mat0.getCol2(), mat1.getCol2() )
-    );
-}
-
-inline const Matrix3 Matrix3::identity( )
-{
-    return Matrix3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationX( float radians )
-{
-    return rotationX( floatInVec(radians) );
-}
-
-inline const Matrix3 Matrix3::rotationX( floatInVec radians )
-{
-    vec_float4 s, c, res1, res2;
-    vec_uint4 select_y, select_z;
-    vec_float4 zero;
-    select_y = _VECTORMATH_MASK_0x0F00;
-    select_z = _VECTORMATH_MASK_0x00F0;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( radians.get128(), &s, &c );
-    res1 = vec_sel( zero, c, select_y );
-    res1 = vec_sel( res1, s, select_z );
-    res2 = vec_sel( zero, negatef4(s), select_y );
-    res2 = vec_sel( res2, c, select_z );
-    return Matrix3(
-        Vector3::xAxis( ),
-        Vector3( res1 ),
-        Vector3( res2 )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationY( float radians )
-{
-    return rotationY( floatInVec(radians) );
-}
-
-inline const Matrix3 Matrix3::rotationY( floatInVec radians )
-{
-    vec_float4 s, c, res0, res2;
-    vec_uint4 select_x, select_z;
-    vec_float4 zero;
-    select_x = _VECTORMATH_MASK_0xF000;
-    select_z = _VECTORMATH_MASK_0x00F0;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( radians.get128(), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, negatef4(s), select_z );
-    res2 = vec_sel( zero, s, select_x );
-    res2 = vec_sel( res2, c, select_z );
-    return Matrix3(
-        Vector3( res0 ),
-        Vector3::yAxis( ),
-        Vector3( res2 )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationZ( float radians )
-{
-    return rotationZ( floatInVec(radians) );
-}
-
-inline const Matrix3 Matrix3::rotationZ( floatInVec radians )
-{
-    vec_float4 s, c, res0, res1;
-    vec_uint4 select_x, select_y;
-    vec_float4 zero;
-    select_x = _VECTORMATH_MASK_0xF000;
-    select_y = _VECTORMATH_MASK_0x0F00;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( radians.get128(), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, s, select_y );
-    res1 = vec_sel( zero, negatef4(s), select_x );
-    res1 = vec_sel( res1, c, select_y );
-    return Matrix3(
-        Vector3( res0 ),
-        Vector3( res1 ),
-        Vector3::zAxis( )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationZYX( Vector3 radiansXYZ )
-{
-    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    angles = Vector4( radiansXYZ, 0.0f ).get128();
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = vec_mergel( c, s );
-    Z1 = vec_mergel( negS, c );
-    Z1 = vec_andc( Z1, (vec_float4)_VECTORMATH_MASK_0x000F );
-    Y0 = vec_perm( negS, c, _VECTORMATH_PERM_BBYX );
-    Y1 = vec_perm( c, s, _VECTORMATH_PERM_BBYX );
-    X0 = vec_splat( s, 0 );
-    X1 = vec_splat( c, 0 );
-    tmp = vec_madd( Z0, Y1, zero );
-    return Matrix3(
-        Vector3( vec_madd( Z0, Y0, zero ) ),
-        Vector3( vec_madd( Z1, X1, vec_madd( tmp, X0, zero ) ) ),
-        Vector3( vec_nmsub( Z1, X0, vec_madd( tmp, X1, zero ) ) )
-    );
-}
-
-inline const Matrix3 Matrix3::rotation( float radians, Vector3 unitVec )
-{
-    return rotation( floatInVec(radians), unitVec );
-}
-
-inline const Matrix3 Matrix3::rotation( floatInVec radians, Vector3 unitVec )
-{
-    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    axis = unitVec.get128();
-    sincosf4( radians.get128(), &s, &c );
-    xxxx = vec_splat( axis, 0 );
-    yyyy = vec_splat( axis, 1 );
-    zzzz = vec_splat( axis, 2 );
-    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
-    axisS = vec_madd( axis, s, zero );
-    negAxisS = negatef4( axisS );
-    tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
-    tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
-    tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
-    tmp0 = vec_sel( tmp0, c, _VECTORMATH_MASK_0xF000 );
-    tmp1 = vec_sel( tmp1, c, _VECTORMATH_MASK_0x0F00 );
-    tmp2 = vec_sel( tmp2, c, _VECTORMATH_MASK_0x00F0 );
-    return Matrix3(
-        Vector3( vec_madd( vec_madd( axis, xxxx, zero ), oneMinusC, tmp0 ) ),
-        Vector3( vec_madd( vec_madd( axis, yyyy, zero ), oneMinusC, tmp1 ) ),
-        Vector3( vec_madd( vec_madd( axis, zzzz, zero ), oneMinusC, tmp2 ) )
-    );
-}
-
-inline const Matrix3 Matrix3::rotation( Quat unitQuat )
-{
-    return Matrix3( unitQuat );
-}
-
-inline const Matrix3 Matrix3::scale( Vector3 scaleVec )
-{
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    return Matrix3(
-        Vector3( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0xF000 ) ),
-        Vector3( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0x0F00 ) ),
-        Vector3( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0x00F0 ) )
-    );
-}
-
-inline const Matrix3 appendScale( const Matrix3 & mat, Vector3 scaleVec )
-{
-    return Matrix3(
-        ( mat.getCol0() * scaleVec.getX( ) ),
-        ( mat.getCol1() * scaleVec.getY( ) ),
-        ( mat.getCol2() * scaleVec.getZ( ) )
-    );
-}
-
-inline const Matrix3 prependScale( Vector3 scaleVec, const Matrix3 & mat )
-{
-    return Matrix3(
-        mulPerElem( mat.getCol0(), scaleVec ),
-        mulPerElem( mat.getCol1(), scaleVec ),
-        mulPerElem( mat.getCol2(), scaleVec )
-    );
-}
-
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 )
-{
-    return Matrix3(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 )
-    );
-}
-
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, boolInVec select1 )
-{
-    return Matrix3(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Matrix3 & mat )
-{
-    print( mat.getRow( 0 ) );
-    print( mat.getRow( 1 ) );
-    print( mat.getRow( 2 ) );
-}
-
-inline void print( const Matrix3 & mat, const char * name )
-{
-    printf("%s:\n", name);
-    print( mat );
-}
-
-#endif
-
-inline Matrix4::Matrix4( const Matrix4 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    mCol3 = mat.mCol3;
-}
-
-inline Matrix4::Matrix4( float scalar )
-{
-    mCol0 = Vector4( scalar );
-    mCol1 = Vector4( scalar );
-    mCol2 = Vector4( scalar );
-    mCol3 = Vector4( scalar );
-}
-
-inline Matrix4::Matrix4( floatInVec scalar )
-{
-    mCol0 = Vector4( scalar );
-    mCol1 = Vector4( scalar );
-    mCol2 = Vector4( scalar );
-    mCol3 = Vector4( scalar );
-}
-
-inline Matrix4::Matrix4( const Transform3 & mat )
-{
-    mCol0 = Vector4( mat.getCol0(), 0.0f );
-    mCol1 = Vector4( mat.getCol1(), 0.0f );
-    mCol2 = Vector4( mat.getCol2(), 0.0f );
-    mCol3 = Vector4( mat.getCol3(), 1.0f );
-}
-
-inline Matrix4::Matrix4( Vector4 _col0, Vector4 _col1, Vector4 _col2, Vector4 _col3 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-    mCol3 = _col3;
-}
-
-inline Matrix4::Matrix4( const Matrix3 & mat, Vector3 translateVec )
-{
-    mCol0 = Vector4( mat.getCol0(), 0.0f );
-    mCol1 = Vector4( mat.getCol1(), 0.0f );
-    mCol2 = Vector4( mat.getCol2(), 0.0f );
-    mCol3 = Vector4( translateVec, 1.0f );
-}
-
-inline Matrix4::Matrix4( Quat unitQuat, Vector3 translateVec )
-{
-    Matrix3 mat;
-    mat = Matrix3( unitQuat );
-    mCol0 = Vector4( mat.getCol0(), 0.0f );
-    mCol1 = Vector4( mat.getCol1(), 0.0f );
-    mCol2 = Vector4( mat.getCol2(), 0.0f );
-    mCol3 = Vector4( translateVec, 1.0f );
-}
-
-inline Matrix4 & Matrix4::setCol0( Vector4 _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol1( Vector4 _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol2( Vector4 _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol3( Vector4 _col3 )
-{
-    mCol3 = _col3;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol( int col, Vector4 vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setRow( int row, Vector4 vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    mCol3.setElem( row, vec.getElem( 3 ) );
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setElem( int col, int row, float val )
-{
-    (*this)[col].setElem(row, val);
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setElem( int col, int row, floatInVec val )
-{
-    Vector4 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline const floatInVec Matrix4::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector4 Matrix4::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector4 Matrix4::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector4 Matrix4::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector4 Matrix4::getCol3( ) const
-{
-    return mCol3;
-}
-
-inline const Vector4 Matrix4::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Matrix4::getRow( int row ) const
-{
-    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
-}
-
-inline Vector4 & Matrix4::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Matrix4::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    mCol3 = mat.mCol3;
-    return *this;
-}
-
-inline const Matrix4 transpose( const Matrix4 & mat )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3;
-    tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
-    tmp1 = vec_mergeh( mat.getCol1().get128(), mat.getCol3().get128() );
-    tmp2 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
-    tmp3 = vec_mergel( mat.getCol1().get128(), mat.getCol3().get128() );
-    res0 = vec_mergeh( tmp0, tmp1 );
-    res1 = vec_mergel( tmp0, tmp1 );
-    res2 = vec_mergeh( tmp2, tmp3 );
-    res3 = vec_mergel( tmp2, tmp3 );
-    return Matrix4(
-        Vector4( res0 ),
-        Vector4( res1 ),
-        Vector4( res2 ),
-        Vector4( res3 )
-    );
-}
-
-inline const Matrix4 inverse( const Matrix4 & mat )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vector float in0, in1, in2, in3;
-    vector float tmp0, tmp1, tmp2, tmp3;
-    vector float cof0, cof1, cof2, cof3;
-    vector float t0, t1, t2, t3;
-    vector float t01, t02, t03, t12, t23;
-    vector float t1r, t2r;
-    vector float t01r, t02r, t03r, t12r, t23r;
-    vector float t1r3, t1r3r;
-    vector float det, det0, det1, det2, det3, invdet;
-    vector float vzero = (vector float){0.0};
-    in0 = mat.getCol0().get128();
-    in1 = mat.getCol1().get128();
-    in2 = mat.getCol2().get128();
-    in3 = mat.getCol3().get128();
-    /* Perform transform of the input matrix of the form:
-     *    A B C D
-     *    E F G H
-     *    I J K L
-     *    M N O P
-     *
-     * The pseudo transpose of the input matrix is trans:
-     *    A E I M
-     *    J N B F
-     *    C G K O
-     *    L P D H
-     */
-    tmp0 = vec_perm(in0, in1, _VECTORMATH_PERM_XAZC);	/* A E C G */
-    tmp1 = vec_perm(in2, in3, _VECTORMATH_PERM_XAZC);	/* I M K O */
-    tmp2 = vec_perm(in0, in1, _VECTORMATH_PERM_YBWD);	/* B F D H */
-    tmp3 = vec_perm(in2, in3, _VECTORMATH_PERM_YBWD);	/* J N L P */
-    t0 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_XYAB);	/* A E I M */
-    t1 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_XYAB);	/* J N B F */
-    t2 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_ZWCD);	/* C G K O */
-    t3 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_ZWCD);	/* L P D H */
-    /* Generate a cofactor matrix. The computed cofactors reside in
-     * cof0, cof1, cof2, cof3.
-     */
-    t23 = vec_madd(t2, t3, vzero);		/* CL GP KD OH */
-    t23 = vec_perm(t23, t23, _VECTORMATH_PERM_YXWZ);	/* GP CL OH KD */
-    cof0 = vec_nmsub(t1, t23, vzero);		/* -(JGP NCL FOH BKD) */
-    cof1 = vec_nmsub(t0, t23, vzero);		/* -(AGP ECL IOH MKD) */
-    t23r = vec_sld(t23, t23, 8);			/* OH KD GP CL */
-    cof0 = vec_madd(t1, t23r, cof0);		/* JOH NKD BGP FCL + cof0 */
-    cof1 = vec_madd(t0, t23r, cof1);		/* AOH EKD IGP MCL + cof1 */
-    cof1 = vec_sld(cof1, cof1, 8);		/* IGP MCL AOH EKD - IOH MKD AGP ECL */
-    t12 = vec_madd(t1, t2, vzero);		/* JC NG BK FO */
-    t12 = vec_perm(t12, t12, _VECTORMATH_PERM_YXWZ);	/* NG JC FO BK */
-    cof0 = vec_madd(t3, t12, cof0);		/* LNG PJC DFO HBK + cof0 */
-    cof3 = vec_madd(t0, t12, vzero);		/* ANG EJC IFO MBK */
-    t12r = vec_sld(t12, t12, 8);			/* FO BK NG JC */
-    cof0 = vec_nmsub(t3, t12r, cof0);		/* cof0 - LFO PBK DNG HJC */
-    cof3 = vec_nmsub(t0, t12r, cof3);		/* cof3 - AFO EBK ING MJC */
-    cof3 = vec_sld(cof3, cof3, 8);		/* ING MJC AFO EBK - IFO MBK ANG EJC */
-    t1r = vec_sld(t1, t1, 8);			/* B F J N */
-    t2r = vec_sld(t2, t2, 8);			/* K O C G */
-    t1r3 = vec_madd(t1r, t3, vzero);		/* BL FP JD NH */
-    t1r3 = vec_perm(t1r3, t1r3, _VECTORMATH_PERM_YXWZ);	/* FP BL NH JD */
-    cof0 = vec_madd(t2r, t1r3, cof0);		/* KFP OBL CNH GJD + cof0 */
-    cof2 = vec_madd(t0, t1r3, vzero);		/* AFP EBL INH MJD */
-    t1r3r = vec_sld(t1r3, t1r3, 8);		/* NH JD FP BL */
-    cof0 = vec_nmsub(t2r, t1r3r, cof0);		/* cof0 - KNH OJD CFP GBL */
-    cof2 = vec_nmsub(t0, t1r3r, cof2);		/* cof2 - ANH EJD IFP MBL */
-    cof2 = vec_sld(cof2, cof2, 8);		/* IFP MBL ANH EJD - INH MJD AFP EBL */
-    t01 = vec_madd(t0, t1, vzero);		/* AJ EN IB MF */
-    t01 = vec_perm(t01, t01, _VECTORMATH_PERM_YXWZ);	/* EN AJ MF IB */
-    cof2 = vec_nmsub(t3, t01, cof2);		/* cof2 - LEN PAJ DMF HIB */
-    cof3 = vec_madd(t2r, t01, cof3);		/* KEN OAJ CMF GIB + cof3 */ 
-    t01r = vec_sld(t01, t01, 8);			/* MF IB EN AJ */
-    cof2 = vec_madd(t3, t01r, cof2);		/* LMF PIB DEN HAJ + cof2 */
-    cof3 = vec_nmsub(t2r, t01r, cof3);		/* cof3 - KMF OIB CEN GAJ */
-    t03 = vec_madd(t0, t3, vzero);		/* AL EP ID MH */
-    t03 = vec_perm(t03, t03, _VECTORMATH_PERM_YXWZ);	/* EP AL MH ID */
-    cof1 = vec_nmsub(t2r, t03, cof1);		/* cof1 - KEP OAL CMH GID */
-    cof2 = vec_madd(t1, t03, cof2);		/* JEP NAL BMH FID + cof2 */
-    t03r = vec_sld(t03, t03, 8);			/* MH ID EP AL */
-    cof1 = vec_madd(t2r, t03r, cof1);		/* KMH OID CEP GAL + cof1 */
-    cof2 = vec_nmsub(t1, t03r, cof2);		/* cof2 - JMH NID BEP FAL */ 
-    t02 = vec_madd(t0, t2r, vzero);		/* AK EO IC MG */
-    t02 = vec_perm(t02, t02, _VECTORMATH_PERM_YXWZ);	/* E0 AK MG IC */
-    cof1 = vec_madd(t3, t02, cof1);		/* LEO PAK DMG HIC + cof1 */
-    cof3 = vec_nmsub(t1, t02, cof3);		/* cof3 - JEO NAK BMG FIC */
-    t02r = vec_sld(t02, t02, 8);			/* MG IC EO AK */
-    cof1 = vec_nmsub(t3, t02r, cof1);		/* cof1 - LMG PIC DEO HAK */
-    cof3 = vec_madd(t1, t02r, cof3);		/* JMG NIC BEO FAK + cof3 */
-    /* Compute the determinant of the matrix 
-     *
-     * det = sum_across(t0 * cof0);
-     *
-     * We perform a sum across the entire vector so that 
-     * we don't have to splat the result when multiplying the
-     * cofactors by the inverse of the determinant.
-     */
-    det  = vec_madd(t0, cof0, vzero);
-    det0 = vec_splat(det, 0);
-    det1 = vec_splat(det, 1);
-    det2 = vec_splat(det, 2);
-    det3 = vec_splat(det, 3);
-    det  = vec_add(det0, det1);
-    det2 = vec_add(det2, det3);
-    det  = vec_add(det, det2);
-    /* Compute the reciprocal of the determinant.
-     */
-    invdet = recipf4(det);
-    /* Multiply the cofactors by the reciprocal of the determinant.
-     */ 
-    return Matrix4(
-        Vector4( vec_madd(cof0, invdet, vzero) ),
-        Vector4( vec_madd(cof1, invdet, vzero) ),
-        Vector4( vec_madd(cof2, invdet, vzero) ),
-        Vector4( vec_madd(cof3, invdet, vzero) )
-    );
-}
-
-inline const Matrix4 affineInverse( const Matrix4 & mat )
-{
-    Transform3 affineMat;
-    affineMat.setCol0( mat.getCol0().getXYZ( ) );
-    affineMat.setCol1( mat.getCol1().getXYZ( ) );
-    affineMat.setCol2( mat.getCol2().getXYZ( ) );
-    affineMat.setCol3( mat.getCol3().getXYZ( ) );
-    return Matrix4( inverse( affineMat ) );
-}
-
-inline const Matrix4 orthoInverse( const Matrix4 & mat )
-{
-    Transform3 affineMat;
-    affineMat.setCol0( mat.getCol0().getXYZ( ) );
-    affineMat.setCol1( mat.getCol1().getXYZ( ) );
-    affineMat.setCol2( mat.getCol2().getXYZ( ) );
-    affineMat.setCol3( mat.getCol3().getXYZ( ) );
-    return Matrix4( orthoInverse( affineMat ) );
-}
-
-inline const floatInVec determinant( const Matrix4 & mat )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vector float in0, in1, in2, in3;
-    vector float tmp0, tmp1, tmp2, tmp3;
-    vector float cof0;
-    vector float t0, t1, t2, t3;
-    vector float t12, t23;
-    vector float t1r, t2r;
-    vector float t12r, t23r;
-    vector float t1r3, t1r3r;
-    vector float vzero = (vector float){0.0};
-    in0 = mat.getCol0().get128();
-    in1 = mat.getCol1().get128();
-    in2 = mat.getCol2().get128();
-    in3 = mat.getCol3().get128();
-    /* Perform transform of the input matrix of the form:
-     *    A B C D
-     *    E F G H
-     *    I J K L
-     *    M N O P
-     *
-     * The pseudo transpose of the input matrix is trans:
-     *    A E I M
-     *    J N B F
-     *    C G K O
-     *    L P D H
-     */
-    tmp0 = vec_perm(in0, in1, _VECTORMATH_PERM_XAZC);	/* A E C G */
-    tmp1 = vec_perm(in2, in3, _VECTORMATH_PERM_XAZC);	/* I M K O */
-    tmp2 = vec_perm(in0, in1, _VECTORMATH_PERM_YBWD);	/* B F D H */
-    tmp3 = vec_perm(in2, in3, _VECTORMATH_PERM_YBWD);	/* J N L P */
-    t0 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_XYAB);	/* A E I M */
-    t1 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_XYAB);	/* J N B F */
-    t2 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_ZWCD);	/* C G K O */
-    t3 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_ZWCD);	/* L P D H */
-    /* Generate a cofactor matrix. The computed cofactors reside in
-     * cof0, cof1, cof2, cof3.
-     */
-    t23 = vec_madd(t2, t3, vzero);		/* CL GP KD OH */
-    t23 = vec_perm(t23, t23, _VECTORMATH_PERM_YXWZ);	/* GP CL OH KD */
-    cof0 = vec_nmsub(t1, t23, vzero);		/* -(JGP NCL FOH BKD) */
-    t23r = vec_sld(t23, t23, 8);			/* OH KD GP CL */
-    cof0 = vec_madd(t1, t23r, cof0);		/* JOH NKD BGP FCL + cof0 */
-    t12 = vec_madd(t1, t2, vzero);		/* JC NG BK FO */
-    t12 = vec_perm(t12, t12, _VECTORMATH_PERM_YXWZ);	/* NG JC FO BK */
-    cof0 = vec_madd(t3, t12, cof0);		/* LNG PJC DFO HBK + cof0 */
-    t12r = vec_sld(t12, t12, 8);			/* FO BK NG JC */
-    cof0 = vec_nmsub(t3, t12r, cof0);		/* cof0 - LFO PBK DNG HJC */
-    t1r = vec_sld(t1, t1, 8);			/* B F J N */
-    t2r = vec_sld(t2, t2, 8);			/* K O C G */
-    t1r3 = vec_madd(t1r, t3, vzero);		/* BL FP JD NH */
-    t1r3 = vec_perm(t1r3, t1r3, _VECTORMATH_PERM_YXWZ);	/* FP BL NH JD */
-    cof0 = vec_madd(t2r, t1r3, cof0);		/* KFP OBL CNH GJD + cof0 */
-    t1r3r = vec_sld(t1r3, t1r3, 8);		/* NH JD FP BL */
-    cof0 = vec_nmsub(t2r, t1r3r, cof0);		/* cof0 - KNH OJD CFP GBL */
-    return floatInVec( _vmathVfDot4(t0,cof0), 0 );
-}
-
-inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( mCol0 + mat.mCol0 ),
-        ( mCol1 + mat.mCol1 ),
-        ( mCol2 + mat.mCol2 ),
-        ( mCol3 + mat.mCol3 )
-    );
-}
-
-inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( mCol0 - mat.mCol0 ),
-        ( mCol1 - mat.mCol1 ),
-        ( mCol2 - mat.mCol2 ),
-        ( mCol3 - mat.mCol3 )
-    );
-}
-
-inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
-{
-    *this = *this + mat;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
-{
-    *this = *this - mat;
-    return *this;
-}
-
-inline const Matrix4 Matrix4::operator -( ) const
-{
-    return Matrix4(
-        ( -mCol0 ),
-        ( -mCol1 ),
-        ( -mCol2 ),
-        ( -mCol3 )
-    );
-}
-
-inline const Matrix4 absPerElem( const Matrix4 & mat )
-{
-    return Matrix4(
-        absPerElem( mat.getCol0() ),
-        absPerElem( mat.getCol1() ),
-        absPerElem( mat.getCol2() ),
-        absPerElem( mat.getCol3() )
-    );
-}
-
-inline const Matrix4 Matrix4::operator *( float scalar ) const
-{
-    return *this * floatInVec(scalar);
-}
-
-inline const Matrix4 Matrix4::operator *( floatInVec scalar ) const
-{
-    return Matrix4(
-        ( mCol0 * scalar ),
-        ( mCol1 * scalar ),
-        ( mCol2 * scalar ),
-        ( mCol3 * scalar )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( float scalar )
-{
-    return *this *= floatInVec(scalar);
-}
-
-inline Matrix4 & Matrix4::operator *=( floatInVec scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Matrix4 operator *( float scalar, const Matrix4 & mat )
-{
-    return floatInVec(scalar) * mat;
-}
-
-inline const Matrix4 operator *( floatInVec scalar, const Matrix4 & mat )
-{
-    return mat * scalar;
-}
-
-inline const Vector4 Matrix4::operator *( Vector4 vec ) const
-{
-    vec_float4 tmp0, tmp1, res;
-    vec_float4 xxxx, yyyy, zzzz, wwww;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    xxxx = vec_splat( vec.get128(), 0 );
-    yyyy = vec_splat( vec.get128(), 1 );
-    zzzz = vec_splat( vec.get128(), 2 );
-    wwww = vec_splat( vec.get128(), 3 );
-    tmp0 = vec_madd( mCol0.get128(), xxxx, zero );
-    tmp1 = vec_madd( mCol1.get128(), yyyy, zero );
-    tmp0 = vec_madd( mCol2.get128(), zzzz, tmp0 );
-    tmp1 = vec_madd( mCol3.get128(), wwww, tmp1 );
-    res = vec_add( tmp0, tmp1 );
-    return Vector4( res );
-}
-
-inline const Vector4 Matrix4::operator *( Vector3 vec ) const
-{
-    vec_float4 res;
-    vec_float4 xxxx, yyyy, zzzz;
-    xxxx = vec_splat( vec.get128(), 0 );
-    yyyy = vec_splat( vec.get128(), 1 );
-    zzzz = vec_splat( vec.get128(), 2 );
-    res = vec_madd( mCol0.get128(), xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    res = vec_madd( mCol1.get128(), yyyy, res );
-    res = vec_madd( mCol2.get128(), zzzz, res );
-    return Vector4( res );
-}
-
-inline const Vector4 Matrix4::operator *( Point3 pnt ) const
-{
-    vec_float4 tmp0, tmp1, res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    xxxx = vec_splat( pnt.get128(), 0 );
-    yyyy = vec_splat( pnt.get128(), 1 );
-    zzzz = vec_splat( pnt.get128(), 2 );
-    tmp0 = vec_madd( mCol0.get128(), xxxx, zero );
-    tmp1 = vec_madd( mCol1.get128(), yyyy, zero );
-    tmp0 = vec_madd( mCol2.get128(), zzzz, tmp0 );
-    tmp1 = vec_add( mCol3.get128(), tmp1 );
-    res = vec_add( tmp0, tmp1 );
-    return Vector4( res );
-}
-
-inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( *this * mat.mCol0 ),
-        ( *this * mat.mCol1 ),
-        ( *this * mat.mCol2 ),
-        ( *this * mat.mCol3 )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
-{
-    *this = *this * mat;
-    return *this;
-}
-
-inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
-{
-    return Matrix4(
-        ( *this * tfrm.getCol0() ),
-        ( *this * tfrm.getCol1() ),
-        ( *this * tfrm.getCol2() ),
-        ( *this * Point3( tfrm.getCol3() ) )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
-{
-    *this = *this * tfrm;
-    return *this;
-}
-
-inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
-{
-    return Matrix4(
-        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
-        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
-        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
-        mulPerElem( mat0.getCol3(), mat1.getCol3() )
-    );
-}
-
-inline const Matrix4 Matrix4::identity( )
-{
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4::yAxis( ),
-        Vector4::zAxis( ),
-        Vector4::wAxis( )
-    );
-}
-
-inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
-{
-    mCol0.setXYZ( mat3.getCol0() );
-    mCol1.setXYZ( mat3.getCol1() );
-    mCol2.setXYZ( mat3.getCol2() );
-    return *this;
-}
-
-inline const Matrix3 Matrix4::getUpper3x3( ) const
-{
-    return Matrix3(
-        mCol0.getXYZ( ),
-        mCol1.getXYZ( ),
-        mCol2.getXYZ( )
-    );
-}
-
-inline Matrix4 & Matrix4::setTranslation( Vector3 translateVec )
-{
-    mCol3.setXYZ( translateVec );
-    return *this;
-}
-
-inline const Vector3 Matrix4::getTranslation( ) const
-{
-    return mCol3.getXYZ( );
-}
-
-inline const Matrix4 Matrix4::rotationX( float radians )
-{
-    return rotationX( floatInVec(radians) );
-}
-
-inline const Matrix4 Matrix4::rotationX( floatInVec radians )
-{
-    vec_float4 s, c, res1, res2;
-    vec_uint4 select_y, select_z;
-    vec_float4 zero;
-    select_y = _VECTORMATH_MASK_0x0F00;
-    select_z = _VECTORMATH_MASK_0x00F0;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( radians.get128(), &s, &c );
-    res1 = vec_sel( zero, c, select_y );
-    res1 = vec_sel( res1, s, select_z );
-    res2 = vec_sel( zero, negatef4(s), select_y );
-    res2 = vec_sel( res2, c, select_z );
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4( res1 ),
-        Vector4( res2 ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationY( float radians )
-{
-    return rotationY( floatInVec(radians) );
-}
-
-inline const Matrix4 Matrix4::rotationY( floatInVec radians )
-{
-    vec_float4 s, c, res0, res2;
-    vec_uint4 select_x, select_z;
-    vec_float4 zero;
-    select_x = _VECTORMATH_MASK_0xF000;
-    select_z = _VECTORMATH_MASK_0x00F0;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( radians.get128(), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, negatef4(s), select_z );
-    res2 = vec_sel( zero, s, select_x );
-    res2 = vec_sel( res2, c, select_z );
-    return Matrix4(
-        Vector4( res0 ),
-        Vector4::yAxis( ),
-        Vector4( res2 ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationZ( float radians )
-{
-    return rotationZ( floatInVec(radians) );
-}
-
-inline const Matrix4 Matrix4::rotationZ( floatInVec radians )
-{
-    vec_float4 s, c, res0, res1;
-    vec_uint4 select_x, select_y;
-    vec_float4 zero;
-    select_x = _VECTORMATH_MASK_0xF000;
-    select_y = _VECTORMATH_MASK_0x0F00;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( radians.get128(), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, s, select_y );
-    res1 = vec_sel( zero, negatef4(s), select_x );
-    res1 = vec_sel( res1, c, select_y );
-    return Matrix4(
-        Vector4( res0 ),
-        Vector4( res1 ),
-        Vector4::zAxis( ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationZYX( Vector3 radiansXYZ )
-{
-    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    angles = Vector4( radiansXYZ, 0.0f ).get128();
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = vec_mergel( c, s );
-    Z1 = vec_mergel( negS, c );
-    Z1 = vec_andc( Z1, (vec_float4)_VECTORMATH_MASK_0x000F );
-    Y0 = vec_perm( negS, c, _VECTORMATH_PERM_BBYX );
-    Y1 = vec_perm( c, s, _VECTORMATH_PERM_BBYX );
-    X0 = vec_splat( s, 0 );
-    X1 = vec_splat( c, 0 );
-    tmp = vec_madd( Z0, Y1, zero );
-    return Matrix4(
-        Vector4( vec_madd( Z0, Y0, zero ) ),
-        Vector4( vec_madd( Z1, X1, vec_madd( tmp, X0, zero ) ) ),
-        Vector4( vec_nmsub( Z1, X0, vec_madd( tmp, X1, zero ) ) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotation( float radians, Vector3 unitVec )
-{
-    return rotation( floatInVec(radians), unitVec );
-}
-
-inline const Matrix4 Matrix4::rotation( floatInVec radians, Vector3 unitVec )
-{
-    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2, zeroW;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    axis = unitVec.get128();
-    sincosf4( radians.get128(), &s, &c );
-    xxxx = vec_splat( axis, 0 );
-    yyyy = vec_splat( axis, 1 );
-    zzzz = vec_splat( axis, 2 );
-    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
-    axisS = vec_madd( axis, s, zero );
-    negAxisS = negatef4( axisS );
-    tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
-    tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
-    tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
-    tmp0 = vec_sel( tmp0, c, _VECTORMATH_MASK_0xF000 );
-    tmp1 = vec_sel( tmp1, c, _VECTORMATH_MASK_0x0F00 );
-    tmp2 = vec_sel( tmp2, c, _VECTORMATH_MASK_0x00F0 );
-    zeroW = (vec_float4)_VECTORMATH_MASK_0x000F;
-    axis = vec_andc( axis, zeroW );
-    tmp0 = vec_andc( tmp0, zeroW );
-    tmp1 = vec_andc( tmp1, zeroW );
-    tmp2 = vec_andc( tmp2, zeroW );
-    return Matrix4(
-        Vector4( vec_madd( vec_madd( axis, xxxx, zero ), oneMinusC, tmp0 ) ),
-        Vector4( vec_madd( vec_madd( axis, yyyy, zero ), oneMinusC, tmp1 ) ),
-        Vector4( vec_madd( vec_madd( axis, zzzz, zero ), oneMinusC, tmp2 ) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotation( Quat unitQuat )
-{
-    return Matrix4( Transform3::rotation( unitQuat ) );
-}
-
-inline const Matrix4 Matrix4::scale( Vector3 scaleVec )
-{
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    return Matrix4(
-        Vector4( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0xF000 ) ),
-        Vector4( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0x0F00 ) ),
-        Vector4( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0x00F0 ) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 appendScale( const Matrix4 & mat, Vector3 scaleVec )
-{
-    return Matrix4(
-        ( mat.getCol0() * scaleVec.getX( ) ),
-        ( mat.getCol1() * scaleVec.getY( ) ),
-        ( mat.getCol2() * scaleVec.getZ( ) ),
-        mat.getCol3()
-    );
-}
-
-inline const Matrix4 prependScale( Vector3 scaleVec, const Matrix4 & mat )
-{
-    Vector4 scale4;
-    scale4 = Vector4( scaleVec, 1.0f );
-    return Matrix4(
-        mulPerElem( mat.getCol0(), scale4 ),
-        mulPerElem( mat.getCol1(), scale4 ),
-        mulPerElem( mat.getCol2(), scale4 ),
-        mulPerElem( mat.getCol3(), scale4 )
-    );
-}
-
-inline const Matrix4 Matrix4::translation( Vector3 translateVec )
-{
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4::yAxis( ),
-        Vector4::zAxis( ),
-        Vector4( translateVec, 1.0f )
-    );
-}
-
-inline const Matrix4 Matrix4::lookAt( Point3 eyePos, Point3 lookAtPos, Vector3 upVec )
-{
-    Matrix4 m4EyeFrame;
-    Vector3 v3X, v3Y, v3Z;
-    v3Y = normalize( upVec );
-    v3Z = normalize( ( eyePos - lookAtPos ) );
-    v3X = normalize( cross( v3Y, v3Z ) );
-    v3Y = cross( v3Z, v3X );
-    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
-    return orthoInverse( m4EyeFrame );
-}
-
-inline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar )
-{
-    float f, rangeInv;
-    vec_float4 zero, col0, col1, col2, col3;
-    union { vec_float4 v; float s[4]; } tmp;
-    f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f );
-    rangeInv = 1.0f / ( zNear - zFar );
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    tmp.v = zero;
-    tmp.s[0] = f / aspect;
-    col0 = tmp.v;
-    tmp.v = zero;
-    tmp.s[1] = f;
-    col1 = tmp.v;
-    tmp.v = zero;
-    tmp.s[2] = ( zNear + zFar ) * rangeInv;
-    tmp.s[3] = -1.0f;
-    col2 = tmp.v;
-    tmp.v = zero;
-    tmp.s[2] = zNear * zFar * rangeInv * 2.0f;
-    col3 = tmp.v;
-    return Matrix4(
-        Vector4( col0 ),
-        Vector4( col1 ),
-        Vector4( col2 ),
-        Vector4( col3 )
-    );
-}
-
-inline const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vec_float4 lbf, rtn;
-    vec_float4 diff, sum, inv_diff;
-    vec_float4 diagonal, column, near2;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    union { vec_float4 v; float s[4]; } l, f, r, n, b, t;
-    l.s[0] = left;
-    f.s[0] = zFar;
-    r.s[0] = right;
-    n.s[0] = zNear;
-    b.s[0] = bottom;
-    t.s[0] = top;
-    lbf = vec_mergeh( l.v, f.v );
-    rtn = vec_mergeh( r.v, n.v );
-    lbf = vec_mergeh( lbf, b.v );
-    rtn = vec_mergeh( rtn, t.v );
-    diff = vec_sub( rtn, lbf );
-    sum  = vec_add( rtn, lbf );
-    inv_diff = recipf4( diff );
-    near2 = vec_splat( n.v, 0 );
-    near2 = vec_add( near2, near2 );
-    diagonal = vec_madd( near2, inv_diff, zero );
-    column = vec_madd( sum, inv_diff, zero );
-    return Matrix4(
-        Vector4( vec_sel( zero, diagonal, _VECTORMATH_MASK_0xF000 ) ),
-        Vector4( vec_sel( zero, diagonal, _VECTORMATH_MASK_0x0F00 ) ),
-        Vector4( vec_sel( column, ((vec_float4){-1.0f,-1.0f,-1.0f,-1.0f}), _VECTORMATH_MASK_0x000F ) ),
-        Vector4( vec_sel( zero, vec_madd( diagonal, vec_splat( f.v, 0 ), zero ), _VECTORMATH_MASK_0x00F0 ) )
-    );
-}
-
-inline const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vec_float4 lbf, rtn;
-    vec_float4 diff, sum, inv_diff, neg_inv_diff;
-    vec_float4 diagonal, column;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    union { vec_float4 v; float s[4]; } l, f, r, n, b, t;
-    l.s[0] = left;
-    f.s[0] = zFar;
-    r.s[0] = right;
-    n.s[0] = zNear;
-    b.s[0] = bottom;
-    t.s[0] = top;
-    lbf = vec_mergeh( l.v, f.v );
-    rtn = vec_mergeh( r.v, n.v );
-    lbf = vec_mergeh( lbf, b.v );
-    rtn = vec_mergeh( rtn, t.v );
-    diff = vec_sub( rtn, lbf );
-    sum  = vec_add( rtn, lbf );
-    inv_diff = recipf4( diff );
-    neg_inv_diff = negatef4( inv_diff );
-    diagonal = vec_add( inv_diff, inv_diff );
-    column = vec_madd( sum, vec_sel( neg_inv_diff, inv_diff, _VECTORMATH_MASK_0x00F0 ), zero );
-    return Matrix4(
-        Vector4( vec_sel( zero, diagonal, _VECTORMATH_MASK_0xF000 ) ),
-        Vector4( vec_sel( zero, diagonal, _VECTORMATH_MASK_0x0F00 ) ),
-        Vector4( vec_sel( zero, diagonal, _VECTORMATH_MASK_0x00F0 ) ),
-        Vector4( vec_sel( column, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), _VECTORMATH_MASK_0x000F ) )
-    );
-}
-
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 )
-{
-    return Matrix4(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 ),
-        select( mat0.getCol3(), mat1.getCol3(), select1 )
-    );
-}
-
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, boolInVec select1 )
-{
-    return Matrix4(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 ),
-        select( mat0.getCol3(), mat1.getCol3(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Matrix4 & mat )
-{
-    print( mat.getRow( 0 ) );
-    print( mat.getRow( 1 ) );
-    print( mat.getRow( 2 ) );
-    print( mat.getRow( 3 ) );
-}
-
-inline void print( const Matrix4 & mat, const char * name )
-{
-    printf("%s:\n", name);
-    print( mat );
-}
-
-#endif
-
-inline Transform3::Transform3( const Transform3 & tfrm )
-{
-    mCol0 = tfrm.mCol0;
-    mCol1 = tfrm.mCol1;
-    mCol2 = tfrm.mCol2;
-    mCol3 = tfrm.mCol3;
-}
-
-inline Transform3::Transform3( float scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-    mCol3 = Vector3( scalar );
-}
-
-inline Transform3::Transform3( floatInVec scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-    mCol3 = Vector3( scalar );
-}
-
-inline Transform3::Transform3( Vector3 _col0, Vector3 _col1, Vector3 _col2, Vector3 _col3 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-    mCol3 = _col3;
-}
-
-inline Transform3::Transform3( const Matrix3 & tfrm, Vector3 translateVec )
-{
-    this->setUpper3x3( tfrm );
-    this->setTranslation( translateVec );
-}
-
-inline Transform3::Transform3( Quat unitQuat, Vector3 translateVec )
-{
-    this->setUpper3x3( Matrix3( unitQuat ) );
-    this->setTranslation( translateVec );
-}
-
-inline Transform3 & Transform3::setCol0( Vector3 _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol1( Vector3 _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol2( Vector3 _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol3( Vector3 _col3 )
-{
-    mCol3 = _col3;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol( int col, Vector3 vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Transform3 & Transform3::setRow( int row, Vector4 vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    mCol3.setElem( row, vec.getElem( 3 ) );
-    return *this;
-}
-
-inline Transform3 & Transform3::setElem( int col, int row, float val )
-{
-    (*this)[col].setElem(row, val);
-    return *this;
-}
-
-inline Transform3 & Transform3::setElem( int col, int row, floatInVec val )
-{
-    Vector3 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline const floatInVec Transform3::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector3 Transform3::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector3 Transform3::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector3 Transform3::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector3 Transform3::getCol3( ) const
-{
-    return mCol3;
-}
-
-inline const Vector3 Transform3::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Transform3::getRow( int row ) const
-{
-    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
-}
-
-inline Vector3 & Transform3::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Transform3::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
-{
-    mCol0 = tfrm.mCol0;
-    mCol1 = tfrm.mCol1;
-    mCol2 = tfrm.mCol2;
-    mCol3 = tfrm.mCol3;
-    return *this;
-}
-
-inline const Transform3 inverse( const Transform3 & tfrm )
-{
-    vec_float4 inv0, inv1, inv2, inv3;
-    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    tmp2 = _vmathVfCross( tfrm.getCol0().get128(), tfrm.getCol1().get128() );
-    tmp0 = _vmathVfCross( tfrm.getCol1().get128(), tfrm.getCol2().get128() );
-    tmp1 = _vmathVfCross( tfrm.getCol2().get128(), tfrm.getCol0().get128() );
-    inv3 = negatef4( tfrm.getCol3().get128() );
-    dot = _vmathVfDot3( tmp2, tfrm.getCol2().get128() );
-    dot = vec_splat( dot, 0 );
-    invdet = recipf4( dot );
-    tmp3 = vec_mergeh( tmp0, tmp2 );
-    tmp4 = vec_mergel( tmp0, tmp2 );
-    inv0 = vec_mergeh( tmp3, tmp1 );
-    xxxx = vec_splat( inv3, 0 );
-    inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
-    inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
-    yyyy = vec_splat( inv3, 1 );
-    zzzz = vec_splat( inv3, 2 );
-    inv3 = vec_madd( inv0, xxxx, zero );
-    inv3 = vec_madd( inv1, yyyy, inv3 );
-    inv3 = vec_madd( inv2, zzzz, inv3 );
-    inv0 = vec_madd( inv0, invdet, zero );
-    inv1 = vec_madd( inv1, invdet, zero );
-    inv2 = vec_madd( inv2, invdet, zero );
-    inv3 = vec_madd( inv3, invdet, zero );
-    return Transform3(
-        Vector3( inv0 ),
-        Vector3( inv1 ),
-        Vector3( inv2 ),
-        Vector3( inv3 )
-    );
-}
-
-inline const Transform3 orthoInverse( const Transform3 & tfrm )
-{
-    vec_float4 inv0, inv1, inv2, inv3;
-    vec_float4 tmp0, tmp1;
-    vec_float4 xxxx, yyyy, zzzz;
-    tmp0 = vec_mergeh( tfrm.getCol0().get128(), tfrm.getCol2().get128() );
-    tmp1 = vec_mergel( tfrm.getCol0().get128(), tfrm.getCol2().get128() );
-    inv3 = negatef4( tfrm.getCol3().get128() );
-    inv0 = vec_mergeh( tmp0, tfrm.getCol1().get128() );
-    xxxx = vec_splat( inv3, 0 );
-    inv1 = vec_perm( tmp0, tfrm.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
-    inv2 = vec_perm( tmp1, tfrm.getCol1().get128(), _VECTORMATH_PERM_XCYX );
-    yyyy = vec_splat( inv3, 1 );
-    zzzz = vec_splat( inv3, 2 );
-    inv3 = vec_madd( inv0, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    inv3 = vec_madd( inv1, yyyy, inv3 );
-    inv3 = vec_madd( inv2, zzzz, inv3 );
-    return Transform3(
-        Vector3( inv0 ),
-        Vector3( inv1 ),
-        Vector3( inv2 ),
-        Vector3( inv3 )
-    );
-}
-
-inline const Transform3 absPerElem( const Transform3 & tfrm )
-{
-    return Transform3(
-        absPerElem( tfrm.getCol0() ),
-        absPerElem( tfrm.getCol1() ),
-        absPerElem( tfrm.getCol2() ),
-        absPerElem( tfrm.getCol3() )
-    );
-}
-
-inline const Vector3 Transform3::operator *( Vector3 vec ) const
-{
-    vec_float4 res;
-    vec_float4 xxxx, yyyy, zzzz;
-    xxxx = vec_splat( vec.get128(), 0 );
-    yyyy = vec_splat( vec.get128(), 1 );
-    zzzz = vec_splat( vec.get128(), 2 );
-    res = vec_madd( mCol0.get128(), xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    res = vec_madd( mCol1.get128(), yyyy, res );
-    res = vec_madd( mCol2.get128(), zzzz, res );
-    return Vector3( res );
-}
-
-inline const Point3 Transform3::operator *( Point3 pnt ) const
-{
-    vec_float4 tmp0, tmp1, res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    xxxx = vec_splat( pnt.get128(), 0 );
-    yyyy = vec_splat( pnt.get128(), 1 );
-    zzzz = vec_splat( pnt.get128(), 2 );
-    tmp0 = vec_madd( mCol0.get128(), xxxx, zero );
-    tmp1 = vec_madd( mCol1.get128(), yyyy, zero );
-    tmp0 = vec_madd( mCol2.get128(), zzzz, tmp0 );
-    tmp1 = vec_add( mCol3.get128(), tmp1 );
-    res = vec_add( tmp0, tmp1 );
-    return Point3( res );
-}
-
-inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
-{
-    return Transform3(
-        ( *this * tfrm.mCol0 ),
-        ( *this * tfrm.mCol1 ),
-        ( *this * tfrm.mCol2 ),
-        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
-    );
-}
-
-inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
-{
-    *this = *this * tfrm;
-    return *this;
-}
-
-inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
-{
-    return Transform3(
-        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
-        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
-        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
-        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
-    );
-}
-
-inline const Transform3 Transform3::identity( )
-{
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( ),
-        Vector3( 0.0f )
-    );
-}
-
-inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
-{
-    mCol0 = tfrm.getCol0();
-    mCol1 = tfrm.getCol1();
-    mCol2 = tfrm.getCol2();
-    return *this;
-}
-
-inline const Matrix3 Transform3::getUpper3x3( ) const
-{
-    return Matrix3( mCol0, mCol1, mCol2 );
-}
-
-inline Transform3 & Transform3::setTranslation( Vector3 translateVec )
-{
-    mCol3 = translateVec;
-    return *this;
-}
-
-inline const Vector3 Transform3::getTranslation( ) const
-{
-    return mCol3;
-}
-
-inline const Transform3 Transform3::rotationX( float radians )
-{
-    return rotationX( floatInVec(radians) );
-}
-
-inline const Transform3 Transform3::rotationX( floatInVec radians )
-{
-    vec_float4 s, c, res1, res2;
-    vec_uint4 select_y, select_z;
-    vec_float4 zero;
-    select_y = _VECTORMATH_MASK_0x0F00;
-    select_z = _VECTORMATH_MASK_0x00F0;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( radians.get128(), &s, &c );
-    res1 = vec_sel( zero, c, select_y );
-    res1 = vec_sel( res1, s, select_z );
-    res2 = vec_sel( zero, negatef4(s), select_y );
-    res2 = vec_sel( res2, c, select_z );
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3( res1 ),
-        Vector3( res2 ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotationY( float radians )
-{
-    return rotationY( floatInVec(radians) );
-}
-
-inline const Transform3 Transform3::rotationY( floatInVec radians )
-{
-    vec_float4 s, c, res0, res2;
-    vec_uint4 select_x, select_z;
-    vec_float4 zero;
-    select_x = _VECTORMATH_MASK_0xF000;
-    select_z = _VECTORMATH_MASK_0x00F0;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( radians.get128(), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, negatef4(s), select_z );
-    res2 = vec_sel( zero, s, select_x );
-    res2 = vec_sel( res2, c, select_z );
-    return Transform3(
-        Vector3( res0 ),
-        Vector3::yAxis( ),
-        Vector3( res2 ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotationZ( float radians )
-{
-    return rotationZ( floatInVec(radians) );
-}
-
-inline const Transform3 Transform3::rotationZ( floatInVec radians )
-{
-    vec_float4 s, c, res0, res1;
-    vec_uint4 select_x, select_y;
-    vec_float4 zero;
-    select_x = _VECTORMATH_MASK_0xF000;
-    select_y = _VECTORMATH_MASK_0x0F00;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( radians.get128(), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, s, select_y );
-    res1 = vec_sel( zero, negatef4(s), select_x );
-    res1 = vec_sel( res1, c, select_y );
-    return Transform3(
-        Vector3( res0 ),
-        Vector3( res1 ),
-        Vector3::zAxis( ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotationZYX( Vector3 radiansXYZ )
-{
-    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    angles = Vector4( radiansXYZ, 0.0f ).get128();
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = vec_mergel( c, s );
-    Z1 = vec_mergel( negS, c );
-    Z1 = vec_andc( Z1, (vec_float4)_VECTORMATH_MASK_0x000F );
-    Y0 = vec_perm( negS, c, _VECTORMATH_PERM_BBYX );
-    Y1 = vec_perm( c, s, _VECTORMATH_PERM_BBYX );
-    X0 = vec_splat( s, 0 );
-    X1 = vec_splat( c, 0 );
-    tmp = vec_madd( Z0, Y1, zero );
-    return Transform3(
-        Vector3( vec_madd( Z0, Y0, zero ) ),
-        Vector3( vec_madd( Z1, X1, vec_madd( tmp, X0, zero ) ) ),
-        Vector3( vec_nmsub( Z1, X0, vec_madd( tmp, X1, zero ) ) ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotation( float radians, Vector3 unitVec )
-{
-    return rotation( floatInVec(radians), unitVec );
-}
-
-inline const Transform3 Transform3::rotation( floatInVec radians, Vector3 unitVec )
-{
-    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) );
-}
-
-inline const Transform3 Transform3::rotation( Quat unitQuat )
-{
-    return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) );
-}
-
-inline const Transform3 Transform3::scale( Vector3 scaleVec )
-{
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    return Transform3(
-        Vector3( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0xF000 ) ),
-        Vector3( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0x0F00 ) ),
-        Vector3( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0x00F0 ) ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 appendScale( const Transform3 & tfrm, Vector3 scaleVec )
-{
-    return Transform3(
-        ( tfrm.getCol0() * scaleVec.getX( ) ),
-        ( tfrm.getCol1() * scaleVec.getY( ) ),
-        ( tfrm.getCol2() * scaleVec.getZ( ) ),
-        tfrm.getCol3()
-    );
-}
-
-inline const Transform3 prependScale( Vector3 scaleVec, const Transform3 & tfrm )
-{
-    return Transform3(
-        mulPerElem( tfrm.getCol0(), scaleVec ),
-        mulPerElem( tfrm.getCol1(), scaleVec ),
-        mulPerElem( tfrm.getCol2(), scaleVec ),
-        mulPerElem( tfrm.getCol3(), scaleVec )
-    );
-}
-
-inline const Transform3 Transform3::translation( Vector3 translateVec )
-{
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( ),
-        translateVec
-    );
-}
-
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 )
-{
-    return Transform3(
-        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
-        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
-        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
-        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
-    );
-}
-
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, boolInVec select1 )
-{
-    return Transform3(
-        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
-        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
-        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
-        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Transform3 & tfrm )
-{
-    print( tfrm.getRow( 0 ) );
-    print( tfrm.getRow( 1 ) );
-    print( tfrm.getRow( 2 ) );
-}
-
-inline void print( const Transform3 & tfrm, const char * name )
-{
-    printf("%s:\n", name);
-    print( tfrm );
-}
-
-#endif
-
-inline Quat::Quat( const Matrix3 & tfrm )
-{
-    vec_float4 res;
-    vec_float4 col0, col1, col2;
-    vec_float4 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff;
-    vec_float4 zy_xz_yx, yz_zx_xy, sum, diff;
-    vec_float4 radicand, invSqrt, scale;
-    vec_float4 res0, res1, res2, res3;
-    vec_float4 xx, yy, zz;
-    vec_uint4 select_x = _VECTORMATH_MASK_0xF000;
-    vec_uint4 select_y = _VECTORMATH_MASK_0x0F00;
-    vec_uint4 select_z = _VECTORMATH_MASK_0x00F0;
-    vec_uint4 select_w = _VECTORMATH_MASK_0x000F;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-
-    col0 = tfrm.getCol0().get128();
-    col1 = tfrm.getCol1().get128();
-    col2 = tfrm.getCol2().get128();
-
-    /* four cases: */
-    /* trace > 0 */
-    /* else */
-    /*    xx largest diagonal element */
-    /*    yy largest diagonal element */
-    /*    zz largest diagonal element */
-
-    /* compute quaternion for each case */
-
-    xx_yy = vec_sel( col0, col1, select_y );
-    xx_yy_zz_xx = vec_perm( xx_yy, col2, _VECTORMATH_PERM_XYCX );
-    yy_zz_xx_yy = vec_perm( xx_yy, col2, _VECTORMATH_PERM_YCXY );
-    zz_xx_yy_zz = vec_perm( xx_yy, col2, _VECTORMATH_PERM_CXYC );
-
-    diagSum = vec_add( vec_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
-    diagDiff = vec_sub( vec_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
-    radicand = vec_add( vec_sel( diagDiff, diagSum, select_w ), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-    invSqrt = rsqrtf4( radicand );
-
-    zy_xz_yx = vec_sel( col0, col1, select_z );
-    zy_xz_yx = vec_perm( zy_xz_yx, col2, _VECTORMATH_PERM_ZAYX );
-    yz_zx_xy = vec_sel( col0, col1, select_x );
-    yz_zx_xy = vec_perm( yz_zx_xy, col2, _VECTORMATH_PERM_BZXX );
-
-    sum = vec_add( zy_xz_yx, yz_zx_xy );
-    diff = vec_sub( zy_xz_yx, yz_zx_xy );
-
-    scale = vec_madd( invSqrt, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), zero );
-    res0 = vec_perm( sum, diff, _VECTORMATH_PERM_XZYA );
-    res1 = vec_perm( sum, diff, _VECTORMATH_PERM_ZXXB );
-    res2 = vec_perm( sum, diff, _VECTORMATH_PERM_YXXC );
-    res3 = diff;
-    res0 = vec_sel( res0, radicand, select_x );
-    res1 = vec_sel( res1, radicand, select_y );
-    res2 = vec_sel( res2, radicand, select_z );
-    res3 = vec_sel( res3, radicand, select_w );
-    res0 = vec_madd( res0, vec_splat( scale, 0 ), zero );
-    res1 = vec_madd( res1, vec_splat( scale, 1 ), zero );
-    res2 = vec_madd( res2, vec_splat( scale, 2 ), zero );
-    res3 = vec_madd( res3, vec_splat( scale, 3 ), zero );
-
-    /* determine case and select answer */
-
-    xx = vec_splat( col0, 0 );
-    yy = vec_splat( col1, 1 );
-    zz = vec_splat( col2, 2 );
-    res = vec_sel( res0, res1, vec_cmpgt( yy, xx ) );
-    res = vec_sel( res, res2, vec_and( vec_cmpgt( zz, xx ), vec_cmpgt( zz, yy ) ) );
-    res = vec_sel( res, res3, vec_cmpgt( vec_splat( diagSum, 0 ), zero ) );
-    mVec128 = res;
-}
-
-inline const Matrix3 outer( Vector3 tfrm0, Vector3 tfrm1 )
-{
-    return Matrix3(
-        ( tfrm0 * tfrm1.getX( ) ),
-        ( tfrm0 * tfrm1.getY( ) ),
-        ( tfrm0 * tfrm1.getZ( ) )
-    );
-}
-
-inline const Matrix4 outer( Vector4 tfrm0, Vector4 tfrm1 )
-{
-    return Matrix4(
-        ( tfrm0 * tfrm1.getX( ) ),
-        ( tfrm0 * tfrm1.getY( ) ),
-        ( tfrm0 * tfrm1.getZ( ) ),
-        ( tfrm0 * tfrm1.getW( ) )
-    );
-}
-
-inline const Vector3 rowMul( Vector3 vec, const Matrix3 & mat )
-{
-    vec_float4 tmp0, tmp1, mcol0, mcol1, mcol2, res;
-    vec_float4 xxxx, yyyy, zzzz;
-    tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
-    tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
-    xxxx = vec_splat( vec.get128(), 0 );
-    mcol0 = vec_mergeh( tmp0, mat.getCol1().get128() );
-    mcol1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
-    mcol2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX );
-    yyyy = vec_splat( vec.get128(), 1 );
-    res = vec_madd( mcol0, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    zzzz = vec_splat( vec.get128(), 2 );
-    res = vec_madd( mcol1, yyyy, res );
-    res = vec_madd( mcol2, zzzz, res );
-    return Vector3( res );
-}
-
-inline const Matrix3 crossMatrix( Vector3 vec )
-{
-    vec_float4 neg, res0, res1, res2;
-    neg = negatef4( vec.get128() );
-    res0 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_XZBX );
-    res1 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_CXXX );
-    res2 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_YAXX );
-    res0 = vec_andc( res0, (vec_float4)_VECTORMATH_MASK_0xF000 );
-    res1 = vec_andc( res1, (vec_float4)_VECTORMATH_MASK_0x0F00 );
-    res2 = vec_andc( res2, (vec_float4)_VECTORMATH_MASK_0x00F0 );
-    return Matrix3(
-        Vector3( res0 ),
-        Vector3( res1 ),
-        Vector3( res2 )
-    );
-}
-
-inline const Matrix3 crossMatrixMul( Vector3 vec, const Matrix3 & mat )
-{
-    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
-}
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_AOS_CPP_H
+#define _VECTORMATH_MAT_AOS_CPP_H
+
+namespace Vectormath {
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// Constants
+// for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+
+#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B })
+#define _VECTORMATH_PERM_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_XZBX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X })     
+#define _VECTORMATH_PERM_CXXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_YAXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W, _VECTORMATH_PERM_Z })
+#define _VECTORMATH_PERM_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y })
+#define _VECTORMATH_PERM_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_ZAYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_BZXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
+#define _VECTORMATH_PERM_ZXXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_B })
+#define _VECTORMATH_PERM_YXXC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_BBYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+inline Matrix3::Matrix3( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+}
+
+inline Matrix3::Matrix3( float scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+}
+
+inline Matrix3::Matrix3( floatInVec scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+}
+
+inline Matrix3::Matrix3( Quat unitQuat )
+{
+    vec_float4 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2;
+    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    vec_uint4 select_x = _VECTORMATH_MASK_0xF000;
+    vec_uint4 select_z = _VECTORMATH_MASK_0x00F0;
+    xyzw_2 = vec_add( unitQuat.get128(), unitQuat.get128() );
+    wwww = vec_splat( unitQuat.get128(), 3 );
+    yzxw = vec_perm( unitQuat.get128(), unitQuat.get128(), _VECTORMATH_PERM_YZXW );
+    zxyw = vec_perm( unitQuat.get128(), unitQuat.get128(), _VECTORMATH_PERM_ZXYW );
+    yzxw_2 = vec_perm( xyzw_2, xyzw_2, _VECTORMATH_PERM_YZXW );
+    zxyw_2 = vec_perm( xyzw_2, xyzw_2, _VECTORMATH_PERM_ZXYW );
+    tmp0 = vec_madd( yzxw_2, wwww, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmp1 = vec_nmsub( yzxw, yzxw_2, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+    tmp2 = vec_madd( yzxw, xyzw_2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmp0 = vec_madd( zxyw, xyzw_2, tmp0 );
+    tmp1 = vec_nmsub( zxyw, zxyw_2, tmp1 );
+    tmp2 = vec_nmsub( zxyw_2, wwww, tmp2 );
+    tmp3 = vec_sel( tmp0, tmp1, select_x );
+    tmp4 = vec_sel( tmp1, tmp2, select_x );
+    tmp5 = vec_sel( tmp2, tmp0, select_x );
+    mCol0 = Vector3( vec_sel( tmp3, tmp2, select_z ) );
+    mCol1 = Vector3( vec_sel( tmp4, tmp0, select_z ) );
+    mCol2 = Vector3( vec_sel( tmp5, tmp1, select_z ) );
+}
+
+inline Matrix3::Matrix3( Vector3 _col0, Vector3 _col1, Vector3 _col2 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+}
+
+inline Matrix3 & Matrix3::setCol0( Vector3 _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol1( Vector3 _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol2( Vector3 _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol( int col, Vector3 vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setRow( int row, Vector3 vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setElem( int col, int row, float val )
+{
+    (*this)[col].setElem(row, val);
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setElem( int col, int row, floatInVec val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline const floatInVec Matrix3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Matrix3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Matrix3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Matrix3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Matrix3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::getRow( int row ) const
+{
+    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
+}
+
+inline Vector3 & Matrix3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    return *this;
+}
+
+inline const Matrix3 transpose( const Matrix3 & mat )
+{
+    vec_float4 tmp0, tmp1, res0, res1, res2;
+    tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
+    tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
+    res0 = vec_mergeh( tmp0, mat.getCol1().get128() );
+    res1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
+    res2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX );
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3( res2 )
+    );
+}
+
+inline const Matrix3 inverse( const Matrix3 & mat )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    tmp2 = _vmathVfCross( mat.getCol0().get128(), mat.getCol1().get128() );
+    tmp0 = _vmathVfCross( mat.getCol1().get128(), mat.getCol2().get128() );
+    tmp1 = _vmathVfCross( mat.getCol2().get128(), mat.getCol0().get128() );
+    dot = _vmathVfDot3( tmp2, mat.getCol2().get128() );
+    dot = vec_splat( dot, 0 );
+    invdet = recipf4( dot );
+    tmp3 = vec_mergeh( tmp0, tmp2 );
+    tmp4 = vec_mergel( tmp0, tmp2 );
+    inv0 = vec_mergeh( tmp3, tmp1 );
+    inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
+    inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
+    inv0 = vec_madd( inv0, invdet, zero );
+    inv1 = vec_madd( inv1, invdet, zero );
+    inv2 = vec_madd( inv2, invdet, zero );
+    return Matrix3(
+        Vector3( inv0 ),
+        Vector3( inv1 ),
+        Vector3( inv2 )
+    );
+}
+
+inline const floatInVec determinant( const Matrix3 & mat )
+{
+    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
+}
+
+inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 )
+    );
+}
+
+inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix3 Matrix3::operator -( ) const
+{
+    return Matrix3(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 )
+    );
+}
+
+inline const Matrix3 absPerElem( const Matrix3 & mat )
+{
+    return Matrix3(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::operator *( float scalar ) const
+{
+    return *this * floatInVec(scalar);
+}
+
+inline const Matrix3 Matrix3::operator *( floatInVec scalar ) const
+{
+    return Matrix3(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( float scalar )
+{
+    return *this *= floatInVec(scalar);
+}
+
+inline Matrix3 & Matrix3::operator *=( floatInVec scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix3 operator *( float scalar, const Matrix3 & mat )
+{
+    return floatInVec(scalar) * mat;
+}
+
+inline const Matrix3 operator *( floatInVec scalar, const Matrix3 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector3 Matrix3::operator *( Vector3 vec ) const
+{
+    vec_float4 res;
+    vec_float4 xxxx, yyyy, zzzz;
+    xxxx = vec_splat( vec.get128(), 0 );
+    yyyy = vec_splat( vec.get128(), 1 );
+    zzzz = vec_splat( vec.get128(), 2 );
+    res = vec_madd( mCol0.get128(), xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    res = vec_madd( mCol1.get128(), yyyy, res );
+    res = vec_madd( mCol2.get128(), zzzz, res );
+    return Vector3( res );
+}
+
+inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
+{
+    return Matrix3(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::identity( )
+{
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationX( float radians )
+{
+    return rotationX( floatInVec(radians) );
+}
+
+inline const Matrix3 Matrix3::rotationX( floatInVec radians )
+{
+    vec_float4 s, c, res1, res2;
+    vec_uint4 select_y, select_z;
+    vec_float4 zero;
+    select_y = _VECTORMATH_MASK_0x0F00;
+    select_z = _VECTORMATH_MASK_0x00F0;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( radians.get128(), &s, &c );
+    res1 = vec_sel( zero, c, select_y );
+    res1 = vec_sel( res1, s, select_z );
+    res2 = vec_sel( zero, negatef4(s), select_y );
+    res2 = vec_sel( res2, c, select_z );
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3( res1 ),
+        Vector3( res2 )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationY( float radians )
+{
+    return rotationY( floatInVec(radians) );
+}
+
+inline const Matrix3 Matrix3::rotationY( floatInVec radians )
+{
+    vec_float4 s, c, res0, res2;
+    vec_uint4 select_x, select_z;
+    vec_float4 zero;
+    select_x = _VECTORMATH_MASK_0xF000;
+    select_z = _VECTORMATH_MASK_0x00F0;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, negatef4(s), select_z );
+    res2 = vec_sel( zero, s, select_x );
+    res2 = vec_sel( res2, c, select_z );
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3::yAxis( ),
+        Vector3( res2 )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationZ( float radians )
+{
+    return rotationZ( floatInVec(radians) );
+}
+
+inline const Matrix3 Matrix3::rotationZ( floatInVec radians )
+{
+    vec_float4 s, c, res0, res1;
+    vec_uint4 select_x, select_y;
+    vec_float4 zero;
+    select_x = _VECTORMATH_MASK_0xF000;
+    select_y = _VECTORMATH_MASK_0x0F00;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, s, select_y );
+    res1 = vec_sel( zero, negatef4(s), select_x );
+    res1 = vec_sel( res1, c, select_y );
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3::zAxis( )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationZYX( Vector3 radiansXYZ )
+{
+    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    angles = Vector4( radiansXYZ, 0.0f ).get128();
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = vec_mergel( c, s );
+    Z1 = vec_mergel( negS, c );
+    Z1 = vec_andc( Z1, (vec_float4)_VECTORMATH_MASK_0x000F );
+    Y0 = vec_perm( negS, c, _VECTORMATH_PERM_BBYX );
+    Y1 = vec_perm( c, s, _VECTORMATH_PERM_BBYX );
+    X0 = vec_splat( s, 0 );
+    X1 = vec_splat( c, 0 );
+    tmp = vec_madd( Z0, Y1, zero );
+    return Matrix3(
+        Vector3( vec_madd( Z0, Y0, zero ) ),
+        Vector3( vec_madd( Z1, X1, vec_madd( tmp, X0, zero ) ) ),
+        Vector3( vec_nmsub( Z1, X0, vec_madd( tmp, X1, zero ) ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( float radians, Vector3 unitVec )
+{
+    return rotation( floatInVec(radians), unitVec );
+}
+
+inline const Matrix3 Matrix3::rotation( floatInVec radians, Vector3 unitVec )
+{
+    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    axis = unitVec.get128();
+    sincosf4( radians.get128(), &s, &c );
+    xxxx = vec_splat( axis, 0 );
+    yyyy = vec_splat( axis, 1 );
+    zzzz = vec_splat( axis, 2 );
+    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
+    axisS = vec_madd( axis, s, zero );
+    negAxisS = negatef4( axisS );
+    tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
+    tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
+    tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
+    tmp0 = vec_sel( tmp0, c, _VECTORMATH_MASK_0xF000 );
+    tmp1 = vec_sel( tmp1, c, _VECTORMATH_MASK_0x0F00 );
+    tmp2 = vec_sel( tmp2, c, _VECTORMATH_MASK_0x00F0 );
+    return Matrix3(
+        Vector3( vec_madd( vec_madd( axis, xxxx, zero ), oneMinusC, tmp0 ) ),
+        Vector3( vec_madd( vec_madd( axis, yyyy, zero ), oneMinusC, tmp1 ) ),
+        Vector3( vec_madd( vec_madd( axis, zzzz, zero ), oneMinusC, tmp2 ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( Quat unitQuat )
+{
+    return Matrix3( unitQuat );
+}
+
+inline const Matrix3 Matrix3::scale( Vector3 scaleVec )
+{
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    return Matrix3(
+        Vector3( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0xF000 ) ),
+        Vector3( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0x0F00 ) ),
+        Vector3( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0x00F0 ) )
+    );
+}
+
+inline const Matrix3 appendScale( const Matrix3 & mat, Vector3 scaleVec )
+{
+    return Matrix3(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) )
+    );
+}
+
+inline const Matrix3 prependScale( Vector3 scaleVec, const Matrix3 & mat )
+{
+    return Matrix3(
+        mulPerElem( mat.getCol0(), scaleVec ),
+        mulPerElem( mat.getCol1(), scaleVec ),
+        mulPerElem( mat.getCol2(), scaleVec )
+    );
+}
+
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 )
+{
+    return Matrix3(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 )
+    );
+}
+
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, boolInVec select1 )
+{
+    return Matrix3(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix3 & mat )
+{
+    print( mat.getRow( 0 ) );
+    print( mat.getRow( 1 ) );
+    print( mat.getRow( 2 ) );
+}
+
+inline void print( const Matrix3 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Matrix4::Matrix4( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+}
+
+inline Matrix4::Matrix4( float scalar )
+{
+    mCol0 = Vector4( scalar );
+    mCol1 = Vector4( scalar );
+    mCol2 = Vector4( scalar );
+    mCol3 = Vector4( scalar );
+}
+
+inline Matrix4::Matrix4( floatInVec scalar )
+{
+    mCol0 = Vector4( scalar );
+    mCol1 = Vector4( scalar );
+    mCol2 = Vector4( scalar );
+    mCol3 = Vector4( scalar );
+}
+
+inline Matrix4::Matrix4( const Transform3 & mat )
+{
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( mat.getCol3(), 1.0f );
+}
+
+inline Matrix4::Matrix4( Vector4 _col0, Vector4 _col1, Vector4 _col2, Vector4 _col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Matrix4::Matrix4( const Matrix3 & mat, Vector3 translateVec )
+{
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( translateVec, 1.0f );
+}
+
+inline Matrix4::Matrix4( Quat unitQuat, Vector3 translateVec )
+{
+    Matrix3 mat;
+    mat = Matrix3( unitQuat );
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( translateVec, 1.0f );
+}
+
+inline Matrix4 & Matrix4::setCol0( Vector4 _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol1( Vector4 _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol2( Vector4 _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol3( Vector4 _col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol( int col, Vector4 vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setRow( int row, Vector4 vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setElem( int col, int row, float val )
+{
+    (*this)[col].setElem(row, val);
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setElem( int col, int row, floatInVec val )
+{
+    Vector4 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline const floatInVec Matrix4::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector4 Matrix4::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector4 Matrix4::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector4 Matrix4::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector4 Matrix4::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector4 Matrix4::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector4 & Matrix4::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+    return *this;
+}
+
+inline const Matrix4 transpose( const Matrix4 & mat )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3;
+    tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
+    tmp1 = vec_mergeh( mat.getCol1().get128(), mat.getCol3().get128() );
+    tmp2 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
+    tmp3 = vec_mergel( mat.getCol1().get128(), mat.getCol3().get128() );
+    res0 = vec_mergeh( tmp0, tmp1 );
+    res1 = vec_mergel( tmp0, tmp1 );
+    res2 = vec_mergeh( tmp2, tmp3 );
+    res3 = vec_mergel( tmp2, tmp3 );
+    return Matrix4(
+        Vector4( res0 ),
+        Vector4( res1 ),
+        Vector4( res2 ),
+        Vector4( res3 )
+    );
+}
+
+inline const Matrix4 inverse( const Matrix4 & mat )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vector float in0, in1, in2, in3;
+    vector float tmp0, tmp1, tmp2, tmp3;
+    vector float cof0, cof1, cof2, cof3;
+    vector float t0, t1, t2, t3;
+    vector float t01, t02, t03, t12, t23;
+    vector float t1r, t2r;
+    vector float t01r, t02r, t03r, t12r, t23r;
+    vector float t1r3, t1r3r;
+    vector float det, det0, det1, det2, det3, invdet;
+    vector float vzero = (vector float){0.0};
+    in0 = mat.getCol0().get128();
+    in1 = mat.getCol1().get128();
+    in2 = mat.getCol2().get128();
+    in3 = mat.getCol3().get128();
+    /* Perform transform of the input matrix of the form:
+     *    A B C D
+     *    E F G H
+     *    I J K L
+     *    M N O P
+     *
+     * The pseudo transpose of the input matrix is trans:
+     *    A E I M
+     *    J N B F
+     *    C G K O
+     *    L P D H
+     */
+    tmp0 = vec_perm(in0, in1, _VECTORMATH_PERM_XAZC);	/* A E C G */
+    tmp1 = vec_perm(in2, in3, _VECTORMATH_PERM_XAZC);	/* I M K O */
+    tmp2 = vec_perm(in0, in1, _VECTORMATH_PERM_YBWD);	/* B F D H */
+    tmp3 = vec_perm(in2, in3, _VECTORMATH_PERM_YBWD);	/* J N L P */
+    t0 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_XYAB);	/* A E I M */
+    t1 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_XYAB);	/* J N B F */
+    t2 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_ZWCD);	/* C G K O */
+    t3 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_ZWCD);	/* L P D H */
+    /* Generate a cofactor matrix. The computed cofactors reside in
+     * cof0, cof1, cof2, cof3.
+     */
+    t23 = vec_madd(t2, t3, vzero);		/* CL GP KD OH */
+    t23 = vec_perm(t23, t23, _VECTORMATH_PERM_YXWZ);	/* GP CL OH KD */
+    cof0 = vec_nmsub(t1, t23, vzero);		/* -(JGP NCL FOH BKD) */
+    cof1 = vec_nmsub(t0, t23, vzero);		/* -(AGP ECL IOH MKD) */
+    t23r = vec_sld(t23, t23, 8);			/* OH KD GP CL */
+    cof0 = vec_madd(t1, t23r, cof0);		/* JOH NKD BGP FCL + cof0 */
+    cof1 = vec_madd(t0, t23r, cof1);		/* AOH EKD IGP MCL + cof1 */
+    cof1 = vec_sld(cof1, cof1, 8);		/* IGP MCL AOH EKD - IOH MKD AGP ECL */
+    t12 = vec_madd(t1, t2, vzero);		/* JC NG BK FO */
+    t12 = vec_perm(t12, t12, _VECTORMATH_PERM_YXWZ);	/* NG JC FO BK */
+    cof0 = vec_madd(t3, t12, cof0);		/* LNG PJC DFO HBK + cof0 */
+    cof3 = vec_madd(t0, t12, vzero);		/* ANG EJC IFO MBK */
+    t12r = vec_sld(t12, t12, 8);			/* FO BK NG JC */
+    cof0 = vec_nmsub(t3, t12r, cof0);		/* cof0 - LFO PBK DNG HJC */
+    cof3 = vec_nmsub(t0, t12r, cof3);		/* cof3 - AFO EBK ING MJC */
+    cof3 = vec_sld(cof3, cof3, 8);		/* ING MJC AFO EBK - IFO MBK ANG EJC */
+    t1r = vec_sld(t1, t1, 8);			/* B F J N */
+    t2r = vec_sld(t2, t2, 8);			/* K O C G */
+    t1r3 = vec_madd(t1r, t3, vzero);		/* BL FP JD NH */
+    t1r3 = vec_perm(t1r3, t1r3, _VECTORMATH_PERM_YXWZ);	/* FP BL NH JD */
+    cof0 = vec_madd(t2r, t1r3, cof0);		/* KFP OBL CNH GJD + cof0 */
+    cof2 = vec_madd(t0, t1r3, vzero);		/* AFP EBL INH MJD */
+    t1r3r = vec_sld(t1r3, t1r3, 8);		/* NH JD FP BL */
+    cof0 = vec_nmsub(t2r, t1r3r, cof0);		/* cof0 - KNH OJD CFP GBL */
+    cof2 = vec_nmsub(t0, t1r3r, cof2);		/* cof2 - ANH EJD IFP MBL */
+    cof2 = vec_sld(cof2, cof2, 8);		/* IFP MBL ANH EJD - INH MJD AFP EBL */
+    t01 = vec_madd(t0, t1, vzero);		/* AJ EN IB MF */
+    t01 = vec_perm(t01, t01, _VECTORMATH_PERM_YXWZ);	/* EN AJ MF IB */
+    cof2 = vec_nmsub(t3, t01, cof2);		/* cof2 - LEN PAJ DMF HIB */
+    cof3 = vec_madd(t2r, t01, cof3);		/* KEN OAJ CMF GIB + cof3 */ 
+    t01r = vec_sld(t01, t01, 8);			/* MF IB EN AJ */
+    cof2 = vec_madd(t3, t01r, cof2);		/* LMF PIB DEN HAJ + cof2 */
+    cof3 = vec_nmsub(t2r, t01r, cof3);		/* cof3 - KMF OIB CEN GAJ */
+    t03 = vec_madd(t0, t3, vzero);		/* AL EP ID MH */
+    t03 = vec_perm(t03, t03, _VECTORMATH_PERM_YXWZ);	/* EP AL MH ID */
+    cof1 = vec_nmsub(t2r, t03, cof1);		/* cof1 - KEP OAL CMH GID */
+    cof2 = vec_madd(t1, t03, cof2);		/* JEP NAL BMH FID + cof2 */
+    t03r = vec_sld(t03, t03, 8);			/* MH ID EP AL */
+    cof1 = vec_madd(t2r, t03r, cof1);		/* KMH OID CEP GAL + cof1 */
+    cof2 = vec_nmsub(t1, t03r, cof2);		/* cof2 - JMH NID BEP FAL */ 
+    t02 = vec_madd(t0, t2r, vzero);		/* AK EO IC MG */
+    t02 = vec_perm(t02, t02, _VECTORMATH_PERM_YXWZ);	/* E0 AK MG IC */
+    cof1 = vec_madd(t3, t02, cof1);		/* LEO PAK DMG HIC + cof1 */
+    cof3 = vec_nmsub(t1, t02, cof3);		/* cof3 - JEO NAK BMG FIC */
+    t02r = vec_sld(t02, t02, 8);			/* MG IC EO AK */
+    cof1 = vec_nmsub(t3, t02r, cof1);		/* cof1 - LMG PIC DEO HAK */
+    cof3 = vec_madd(t1, t02r, cof3);		/* JMG NIC BEO FAK + cof3 */
+    /* Compute the determinant of the matrix 
+     *
+     * det = sum_across(t0 * cof0);
+     *
+     * We perform a sum across the entire vector so that 
+     * we don't have to splat the result when multiplying the
+     * cofactors by the inverse of the determinant.
+     */
+    det  = vec_madd(t0, cof0, vzero);
+    det0 = vec_splat(det, 0);
+    det1 = vec_splat(det, 1);
+    det2 = vec_splat(det, 2);
+    det3 = vec_splat(det, 3);
+    det  = vec_add(det0, det1);
+    det2 = vec_add(det2, det3);
+    det  = vec_add(det, det2);
+    /* Compute the reciprocal of the determinant.
+     */
+    invdet = recipf4(det);
+    /* Multiply the cofactors by the reciprocal of the determinant.
+     */ 
+    return Matrix4(
+        Vector4( vec_madd(cof0, invdet, vzero) ),
+        Vector4( vec_madd(cof1, invdet, vzero) ),
+        Vector4( vec_madd(cof2, invdet, vzero) ),
+        Vector4( vec_madd(cof3, invdet, vzero) )
+    );
+}
+
+inline const Matrix4 affineInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( inverse( affineMat ) );
+}
+
+inline const Matrix4 orthoInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( orthoInverse( affineMat ) );
+}
+
+inline const floatInVec determinant( const Matrix4 & mat )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vector float in0, in1, in2, in3;
+    vector float tmp0, tmp1, tmp2, tmp3;
+    vector float cof0;
+    vector float t0, t1, t2, t3;
+    vector float t12, t23;
+    vector float t1r, t2r;
+    vector float t12r, t23r;
+    vector float t1r3, t1r3r;
+    vector float vzero = (vector float){0.0};
+    in0 = mat.getCol0().get128();
+    in1 = mat.getCol1().get128();
+    in2 = mat.getCol2().get128();
+    in3 = mat.getCol3().get128();
+    /* Perform transform of the input matrix of the form:
+     *    A B C D
+     *    E F G H
+     *    I J K L
+     *    M N O P
+     *
+     * The pseudo transpose of the input matrix is trans:
+     *    A E I M
+     *    J N B F
+     *    C G K O
+     *    L P D H
+     */
+    tmp0 = vec_perm(in0, in1, _VECTORMATH_PERM_XAZC);	/* A E C G */
+    tmp1 = vec_perm(in2, in3, _VECTORMATH_PERM_XAZC);	/* I M K O */
+    tmp2 = vec_perm(in0, in1, _VECTORMATH_PERM_YBWD);	/* B F D H */
+    tmp3 = vec_perm(in2, in3, _VECTORMATH_PERM_YBWD);	/* J N L P */
+    t0 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_XYAB);	/* A E I M */
+    t1 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_XYAB);	/* J N B F */
+    t2 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_ZWCD);	/* C G K O */
+    t3 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_ZWCD);	/* L P D H */
+    /* Generate a cofactor matrix. The computed cofactors reside in
+     * cof0, cof1, cof2, cof3.
+     */
+    t23 = vec_madd(t2, t3, vzero);		/* CL GP KD OH */
+    t23 = vec_perm(t23, t23, _VECTORMATH_PERM_YXWZ);	/* GP CL OH KD */
+    cof0 = vec_nmsub(t1, t23, vzero);		/* -(JGP NCL FOH BKD) */
+    t23r = vec_sld(t23, t23, 8);			/* OH KD GP CL */
+    cof0 = vec_madd(t1, t23r, cof0);		/* JOH NKD BGP FCL + cof0 */
+    t12 = vec_madd(t1, t2, vzero);		/* JC NG BK FO */
+    t12 = vec_perm(t12, t12, _VECTORMATH_PERM_YXWZ);	/* NG JC FO BK */
+    cof0 = vec_madd(t3, t12, cof0);		/* LNG PJC DFO HBK + cof0 */
+    t12r = vec_sld(t12, t12, 8);			/* FO BK NG JC */
+    cof0 = vec_nmsub(t3, t12r, cof0);		/* cof0 - LFO PBK DNG HJC */
+    t1r = vec_sld(t1, t1, 8);			/* B F J N */
+    t2r = vec_sld(t2, t2, 8);			/* K O C G */
+    t1r3 = vec_madd(t1r, t3, vzero);		/* BL FP JD NH */
+    t1r3 = vec_perm(t1r3, t1r3, _VECTORMATH_PERM_YXWZ);	/* FP BL NH JD */
+    cof0 = vec_madd(t2r, t1r3, cof0);		/* KFP OBL CNH GJD + cof0 */
+    t1r3r = vec_sld(t1r3, t1r3, 8);		/* NH JD FP BL */
+    cof0 = vec_nmsub(t2r, t1r3r, cof0);		/* cof0 - KNH OJD CFP GBL */
+    return floatInVec( _vmathVfDot4(t0,cof0), 0 );
+}
+
+inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 ),
+        ( mCol3 + mat.mCol3 )
+    );
+}
+
+inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 ),
+        ( mCol3 - mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator -( ) const
+{
+    return Matrix4(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 ),
+        ( -mCol3 )
+    );
+}
+
+inline const Matrix4 absPerElem( const Matrix4 & mat )
+{
+    return Matrix4(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() ),
+        absPerElem( mat.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::operator *( float scalar ) const
+{
+    return *this * floatInVec(scalar);
+}
+
+inline const Matrix4 Matrix4::operator *( floatInVec scalar ) const
+{
+    return Matrix4(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar ),
+        ( mCol3 * scalar )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( float scalar )
+{
+    return *this *= floatInVec(scalar);
+}
+
+inline Matrix4 & Matrix4::operator *=( floatInVec scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix4 operator *( float scalar, const Matrix4 & mat )
+{
+    return floatInVec(scalar) * mat;
+}
+
+inline const Matrix4 operator *( floatInVec scalar, const Matrix4 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector4 Matrix4::operator *( Vector4 vec ) const
+{
+    vec_float4 tmp0, tmp1, res;
+    vec_float4 xxxx, yyyy, zzzz, wwww;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    xxxx = vec_splat( vec.get128(), 0 );
+    yyyy = vec_splat( vec.get128(), 1 );
+    zzzz = vec_splat( vec.get128(), 2 );
+    wwww = vec_splat( vec.get128(), 3 );
+    tmp0 = vec_madd( mCol0.get128(), xxxx, zero );
+    tmp1 = vec_madd( mCol1.get128(), yyyy, zero );
+    tmp0 = vec_madd( mCol2.get128(), zzzz, tmp0 );
+    tmp1 = vec_madd( mCol3.get128(), wwww, tmp1 );
+    res = vec_add( tmp0, tmp1 );
+    return Vector4( res );
+}
+
+inline const Vector4 Matrix4::operator *( Vector3 vec ) const
+{
+    vec_float4 res;
+    vec_float4 xxxx, yyyy, zzzz;
+    xxxx = vec_splat( vec.get128(), 0 );
+    yyyy = vec_splat( vec.get128(), 1 );
+    zzzz = vec_splat( vec.get128(), 2 );
+    res = vec_madd( mCol0.get128(), xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    res = vec_madd( mCol1.get128(), yyyy, res );
+    res = vec_madd( mCol2.get128(), zzzz, res );
+    return Vector4( res );
+}
+
+inline const Vector4 Matrix4::operator *( Point3 pnt ) const
+{
+    vec_float4 tmp0, tmp1, res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    xxxx = vec_splat( pnt.get128(), 0 );
+    yyyy = vec_splat( pnt.get128(), 1 );
+    zzzz = vec_splat( pnt.get128(), 2 );
+    tmp0 = vec_madd( mCol0.get128(), xxxx, zero );
+    tmp1 = vec_madd( mCol1.get128(), yyyy, zero );
+    tmp0 = vec_madd( mCol2.get128(), zzzz, tmp0 );
+    tmp1 = vec_add( mCol3.get128(), tmp1 );
+    res = vec_add( tmp0, tmp1 );
+    return Vector4( res );
+}
+
+inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 ),
+        ( *this * mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
+{
+    return Matrix4(
+        ( *this * tfrm.getCol0() ),
+        ( *this * tfrm.getCol1() ),
+        ( *this * tfrm.getCol2() ),
+        ( *this * Point3( tfrm.getCol3() ) )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
+{
+    return Matrix4(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
+        mulPerElem( mat0.getCol3(), mat1.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::identity( )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
+{
+    mCol0.setXYZ( mat3.getCol0() );
+    mCol1.setXYZ( mat3.getCol1() );
+    mCol2.setXYZ( mat3.getCol2() );
+    return *this;
+}
+
+inline const Matrix3 Matrix4::getUpper3x3( ) const
+{
+    return Matrix3(
+        mCol0.getXYZ( ),
+        mCol1.getXYZ( ),
+        mCol2.getXYZ( )
+    );
+}
+
+inline Matrix4 & Matrix4::setTranslation( Vector3 translateVec )
+{
+    mCol3.setXYZ( translateVec );
+    return *this;
+}
+
+inline const Vector3 Matrix4::getTranslation( ) const
+{
+    return mCol3.getXYZ( );
+}
+
+inline const Matrix4 Matrix4::rotationX( float radians )
+{
+    return rotationX( floatInVec(radians) );
+}
+
+inline const Matrix4 Matrix4::rotationX( floatInVec radians )
+{
+    vec_float4 s, c, res1, res2;
+    vec_uint4 select_y, select_z;
+    vec_float4 zero;
+    select_y = _VECTORMATH_MASK_0x0F00;
+    select_z = _VECTORMATH_MASK_0x00F0;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( radians.get128(), &s, &c );
+    res1 = vec_sel( zero, c, select_y );
+    res1 = vec_sel( res1, s, select_z );
+    res2 = vec_sel( zero, negatef4(s), select_y );
+    res2 = vec_sel( res2, c, select_z );
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4( res1 ),
+        Vector4( res2 ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationY( float radians )
+{
+    return rotationY( floatInVec(radians) );
+}
+
+inline const Matrix4 Matrix4::rotationY( floatInVec radians )
+{
+    vec_float4 s, c, res0, res2;
+    vec_uint4 select_x, select_z;
+    vec_float4 zero;
+    select_x = _VECTORMATH_MASK_0xF000;
+    select_z = _VECTORMATH_MASK_0x00F0;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, negatef4(s), select_z );
+    res2 = vec_sel( zero, s, select_x );
+    res2 = vec_sel( res2, c, select_z );
+    return Matrix4(
+        Vector4( res0 ),
+        Vector4::yAxis( ),
+        Vector4( res2 ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZ( float radians )
+{
+    return rotationZ( floatInVec(radians) );
+}
+
+inline const Matrix4 Matrix4::rotationZ( floatInVec radians )
+{
+    vec_float4 s, c, res0, res1;
+    vec_uint4 select_x, select_y;
+    vec_float4 zero;
+    select_x = _VECTORMATH_MASK_0xF000;
+    select_y = _VECTORMATH_MASK_0x0F00;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, s, select_y );
+    res1 = vec_sel( zero, negatef4(s), select_x );
+    res1 = vec_sel( res1, c, select_y );
+    return Matrix4(
+        Vector4( res0 ),
+        Vector4( res1 ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZYX( Vector3 radiansXYZ )
+{
+    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    angles = Vector4( radiansXYZ, 0.0f ).get128();
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = vec_mergel( c, s );
+    Z1 = vec_mergel( negS, c );
+    Z1 = vec_andc( Z1, (vec_float4)_VECTORMATH_MASK_0x000F );
+    Y0 = vec_perm( negS, c, _VECTORMATH_PERM_BBYX );
+    Y1 = vec_perm( c, s, _VECTORMATH_PERM_BBYX );
+    X0 = vec_splat( s, 0 );
+    X1 = vec_splat( c, 0 );
+    tmp = vec_madd( Z0, Y1, zero );
+    return Matrix4(
+        Vector4( vec_madd( Z0, Y0, zero ) ),
+        Vector4( vec_madd( Z1, X1, vec_madd( tmp, X0, zero ) ) ),
+        Vector4( vec_nmsub( Z1, X0, vec_madd( tmp, X1, zero ) ) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( float radians, Vector3 unitVec )
+{
+    return rotation( floatInVec(radians), unitVec );
+}
+
+inline const Matrix4 Matrix4::rotation( floatInVec radians, Vector3 unitVec )
+{
+    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2, zeroW;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    axis = unitVec.get128();
+    sincosf4( radians.get128(), &s, &c );
+    xxxx = vec_splat( axis, 0 );
+    yyyy = vec_splat( axis, 1 );
+    zzzz = vec_splat( axis, 2 );
+    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
+    axisS = vec_madd( axis, s, zero );
+    negAxisS = negatef4( axisS );
+    tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
+    tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
+    tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
+    tmp0 = vec_sel( tmp0, c, _VECTORMATH_MASK_0xF000 );
+    tmp1 = vec_sel( tmp1, c, _VECTORMATH_MASK_0x0F00 );
+    tmp2 = vec_sel( tmp2, c, _VECTORMATH_MASK_0x00F0 );
+    zeroW = (vec_float4)_VECTORMATH_MASK_0x000F;
+    axis = vec_andc( axis, zeroW );
+    tmp0 = vec_andc( tmp0, zeroW );
+    tmp1 = vec_andc( tmp1, zeroW );
+    tmp2 = vec_andc( tmp2, zeroW );
+    return Matrix4(
+        Vector4( vec_madd( vec_madd( axis, xxxx, zero ), oneMinusC, tmp0 ) ),
+        Vector4( vec_madd( vec_madd( axis, yyyy, zero ), oneMinusC, tmp1 ) ),
+        Vector4( vec_madd( vec_madd( axis, zzzz, zero ), oneMinusC, tmp2 ) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( Quat unitQuat )
+{
+    return Matrix4( Transform3::rotation( unitQuat ) );
+}
+
+inline const Matrix4 Matrix4::scale( Vector3 scaleVec )
+{
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    return Matrix4(
+        Vector4( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0xF000 ) ),
+        Vector4( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0x0F00 ) ),
+        Vector4( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0x00F0 ) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 appendScale( const Matrix4 & mat, Vector3 scaleVec )
+{
+    return Matrix4(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) ),
+        mat.getCol3()
+    );
+}
+
+inline const Matrix4 prependScale( Vector3 scaleVec, const Matrix4 & mat )
+{
+    Vector4 scale4;
+    scale4 = Vector4( scaleVec, 1.0f );
+    return Matrix4(
+        mulPerElem( mat.getCol0(), scale4 ),
+        mulPerElem( mat.getCol1(), scale4 ),
+        mulPerElem( mat.getCol2(), scale4 ),
+        mulPerElem( mat.getCol3(), scale4 )
+    );
+}
+
+inline const Matrix4 Matrix4::translation( Vector3 translateVec )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4( translateVec, 1.0f )
+    );
+}
+
+inline const Matrix4 Matrix4::lookAt( Point3 eyePos, Point3 lookAtPos, Vector3 upVec )
+{
+    Matrix4 m4EyeFrame;
+    Vector3 v3X, v3Y, v3Z;
+    v3Y = normalize( upVec );
+    v3Z = normalize( ( eyePos - lookAtPos ) );
+    v3X = normalize( cross( v3Y, v3Z ) );
+    v3Y = cross( v3Z, v3X );
+    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
+    return orthoInverse( m4EyeFrame );
+}
+
+inline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar )
+{
+    float f, rangeInv;
+    vec_float4 zero, col0, col1, col2, col3;
+    union { vec_float4 v; float s[4]; } tmp;
+    f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f );
+    rangeInv = 1.0f / ( zNear - zFar );
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    tmp.v = zero;
+    tmp.s[0] = f / aspect;
+    col0 = tmp.v;
+    tmp.v = zero;
+    tmp.s[1] = f;
+    col1 = tmp.v;
+    tmp.v = zero;
+    tmp.s[2] = ( zNear + zFar ) * rangeInv;
+    tmp.s[3] = -1.0f;
+    col2 = tmp.v;
+    tmp.v = zero;
+    tmp.s[2] = zNear * zFar * rangeInv * 2.0f;
+    col3 = tmp.v;
+    return Matrix4(
+        Vector4( col0 ),
+        Vector4( col1 ),
+        Vector4( col2 ),
+        Vector4( col3 )
+    );
+}
+
+inline const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vec_float4 lbf, rtn;
+    vec_float4 diff, sum, inv_diff;
+    vec_float4 diagonal, column, near2;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    union { vec_float4 v; float s[4]; } l, f, r, n, b, t;
+    l.s[0] = left;
+    f.s[0] = zFar;
+    r.s[0] = right;
+    n.s[0] = zNear;
+    b.s[0] = bottom;
+    t.s[0] = top;
+    lbf = vec_mergeh( l.v, f.v );
+    rtn = vec_mergeh( r.v, n.v );
+    lbf = vec_mergeh( lbf, b.v );
+    rtn = vec_mergeh( rtn, t.v );
+    diff = vec_sub( rtn, lbf );
+    sum  = vec_add( rtn, lbf );
+    inv_diff = recipf4( diff );
+    near2 = vec_splat( n.v, 0 );
+    near2 = vec_add( near2, near2 );
+    diagonal = vec_madd( near2, inv_diff, zero );
+    column = vec_madd( sum, inv_diff, zero );
+    return Matrix4(
+        Vector4( vec_sel( zero, diagonal, _VECTORMATH_MASK_0xF000 ) ),
+        Vector4( vec_sel( zero, diagonal, _VECTORMATH_MASK_0x0F00 ) ),
+        Vector4( vec_sel( column, ((vec_float4){-1.0f,-1.0f,-1.0f,-1.0f}), _VECTORMATH_MASK_0x000F ) ),
+        Vector4( vec_sel( zero, vec_madd( diagonal, vec_splat( f.v, 0 ), zero ), _VECTORMATH_MASK_0x00F0 ) )
+    );
+}
+
+inline const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vec_float4 lbf, rtn;
+    vec_float4 diff, sum, inv_diff, neg_inv_diff;
+    vec_float4 diagonal, column;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    union { vec_float4 v; float s[4]; } l, f, r, n, b, t;
+    l.s[0] = left;
+    f.s[0] = zFar;
+    r.s[0] = right;
+    n.s[0] = zNear;
+    b.s[0] = bottom;
+    t.s[0] = top;
+    lbf = vec_mergeh( l.v, f.v );
+    rtn = vec_mergeh( r.v, n.v );
+    lbf = vec_mergeh( lbf, b.v );
+    rtn = vec_mergeh( rtn, t.v );
+    diff = vec_sub( rtn, lbf );
+    sum  = vec_add( rtn, lbf );
+    inv_diff = recipf4( diff );
+    neg_inv_diff = negatef4( inv_diff );
+    diagonal = vec_add( inv_diff, inv_diff );
+    column = vec_madd( sum, vec_sel( neg_inv_diff, inv_diff, _VECTORMATH_MASK_0x00F0 ), zero );
+    return Matrix4(
+        Vector4( vec_sel( zero, diagonal, _VECTORMATH_MASK_0xF000 ) ),
+        Vector4( vec_sel( zero, diagonal, _VECTORMATH_MASK_0x0F00 ) ),
+        Vector4( vec_sel( zero, diagonal, _VECTORMATH_MASK_0x00F0 ) ),
+        Vector4( vec_sel( column, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), _VECTORMATH_MASK_0x000F ) )
+    );
+}
+
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 )
+{
+    return Matrix4(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 ),
+        select( mat0.getCol3(), mat1.getCol3(), select1 )
+    );
+}
+
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, boolInVec select1 )
+{
+    return Matrix4(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 ),
+        select( mat0.getCol3(), mat1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix4 & mat )
+{
+    print( mat.getRow( 0 ) );
+    print( mat.getRow( 1 ) );
+    print( mat.getRow( 2 ) );
+    print( mat.getRow( 3 ) );
+}
+
+inline void print( const Matrix4 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Transform3::Transform3( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+}
+
+inline Transform3::Transform3( float scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+    mCol3 = Vector3( scalar );
+}
+
+inline Transform3::Transform3( floatInVec scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+    mCol3 = Vector3( scalar );
+}
+
+inline Transform3::Transform3( Vector3 _col0, Vector3 _col1, Vector3 _col2, Vector3 _col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Transform3::Transform3( const Matrix3 & tfrm, Vector3 translateVec )
+{
+    this->setUpper3x3( tfrm );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3::Transform3( Quat unitQuat, Vector3 translateVec )
+{
+    this->setUpper3x3( Matrix3( unitQuat ) );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3 & Transform3::setCol0( Vector3 _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol1( Vector3 _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol2( Vector3 _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol3( Vector3 _col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol( int col, Vector3 vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Transform3 & Transform3::setRow( int row, Vector4 vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Transform3 & Transform3::setElem( int col, int row, float val )
+{
+    (*this)[col].setElem(row, val);
+    return *this;
+}
+
+inline Transform3 & Transform3::setElem( int col, int row, floatInVec val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline const floatInVec Transform3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Transform3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Transform3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Transform3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Transform3::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector3 Transform3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Transform3::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector3 & Transform3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Transform3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+    return *this;
+}
+
+inline const Transform3 inverse( const Transform3 & tfrm )
+{
+    vec_float4 inv0, inv1, inv2, inv3;
+    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    tmp2 = _vmathVfCross( tfrm.getCol0().get128(), tfrm.getCol1().get128() );
+    tmp0 = _vmathVfCross( tfrm.getCol1().get128(), tfrm.getCol2().get128() );
+    tmp1 = _vmathVfCross( tfrm.getCol2().get128(), tfrm.getCol0().get128() );
+    inv3 = negatef4( tfrm.getCol3().get128() );
+    dot = _vmathVfDot3( tmp2, tfrm.getCol2().get128() );
+    dot = vec_splat( dot, 0 );
+    invdet = recipf4( dot );
+    tmp3 = vec_mergeh( tmp0, tmp2 );
+    tmp4 = vec_mergel( tmp0, tmp2 );
+    inv0 = vec_mergeh( tmp3, tmp1 );
+    xxxx = vec_splat( inv3, 0 );
+    inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
+    inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
+    yyyy = vec_splat( inv3, 1 );
+    zzzz = vec_splat( inv3, 2 );
+    inv3 = vec_madd( inv0, xxxx, zero );
+    inv3 = vec_madd( inv1, yyyy, inv3 );
+    inv3 = vec_madd( inv2, zzzz, inv3 );
+    inv0 = vec_madd( inv0, invdet, zero );
+    inv1 = vec_madd( inv1, invdet, zero );
+    inv2 = vec_madd( inv2, invdet, zero );
+    inv3 = vec_madd( inv3, invdet, zero );
+    return Transform3(
+        Vector3( inv0 ),
+        Vector3( inv1 ),
+        Vector3( inv2 ),
+        Vector3( inv3 )
+    );
+}
+
+inline const Transform3 orthoInverse( const Transform3 & tfrm )
+{
+    vec_float4 inv0, inv1, inv2, inv3;
+    vec_float4 tmp0, tmp1;
+    vec_float4 xxxx, yyyy, zzzz;
+    tmp0 = vec_mergeh( tfrm.getCol0().get128(), tfrm.getCol2().get128() );
+    tmp1 = vec_mergel( tfrm.getCol0().get128(), tfrm.getCol2().get128() );
+    inv3 = negatef4( tfrm.getCol3().get128() );
+    inv0 = vec_mergeh( tmp0, tfrm.getCol1().get128() );
+    xxxx = vec_splat( inv3, 0 );
+    inv1 = vec_perm( tmp0, tfrm.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
+    inv2 = vec_perm( tmp1, tfrm.getCol1().get128(), _VECTORMATH_PERM_XCYX );
+    yyyy = vec_splat( inv3, 1 );
+    zzzz = vec_splat( inv3, 2 );
+    inv3 = vec_madd( inv0, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    inv3 = vec_madd( inv1, yyyy, inv3 );
+    inv3 = vec_madd( inv2, zzzz, inv3 );
+    return Transform3(
+        Vector3( inv0 ),
+        Vector3( inv1 ),
+        Vector3( inv2 ),
+        Vector3( inv3 )
+    );
+}
+
+inline const Transform3 absPerElem( const Transform3 & tfrm )
+{
+    return Transform3(
+        absPerElem( tfrm.getCol0() ),
+        absPerElem( tfrm.getCol1() ),
+        absPerElem( tfrm.getCol2() ),
+        absPerElem( tfrm.getCol3() )
+    );
+}
+
+inline const Vector3 Transform3::operator *( Vector3 vec ) const
+{
+    vec_float4 res;
+    vec_float4 xxxx, yyyy, zzzz;
+    xxxx = vec_splat( vec.get128(), 0 );
+    yyyy = vec_splat( vec.get128(), 1 );
+    zzzz = vec_splat( vec.get128(), 2 );
+    res = vec_madd( mCol0.get128(), xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    res = vec_madd( mCol1.get128(), yyyy, res );
+    res = vec_madd( mCol2.get128(), zzzz, res );
+    return Vector3( res );
+}
+
+inline const Point3 Transform3::operator *( Point3 pnt ) const
+{
+    vec_float4 tmp0, tmp1, res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    xxxx = vec_splat( pnt.get128(), 0 );
+    yyyy = vec_splat( pnt.get128(), 1 );
+    zzzz = vec_splat( pnt.get128(), 2 );
+    tmp0 = vec_madd( mCol0.get128(), xxxx, zero );
+    tmp1 = vec_madd( mCol1.get128(), yyyy, zero );
+    tmp0 = vec_madd( mCol2.get128(), zzzz, tmp0 );
+    tmp1 = vec_add( mCol3.get128(), tmp1 );
+    res = vec_add( tmp0, tmp1 );
+    return Point3( res );
+}
+
+inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
+{
+    return Transform3(
+        ( *this * tfrm.mCol0 ),
+        ( *this * tfrm.mCol1 ),
+        ( *this * tfrm.mCol2 ),
+        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
+    );
+}
+
+inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
+{
+    return Transform3(
+        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
+        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
+        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
+        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
+    );
+}
+
+inline const Transform3 Transform3::identity( )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        Vector3( 0.0f )
+    );
+}
+
+inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
+{
+    mCol0 = tfrm.getCol0();
+    mCol1 = tfrm.getCol1();
+    mCol2 = tfrm.getCol2();
+    return *this;
+}
+
+inline const Matrix3 Transform3::getUpper3x3( ) const
+{
+    return Matrix3( mCol0, mCol1, mCol2 );
+}
+
+inline Transform3 & Transform3::setTranslation( Vector3 translateVec )
+{
+    mCol3 = translateVec;
+    return *this;
+}
+
+inline const Vector3 Transform3::getTranslation( ) const
+{
+    return mCol3;
+}
+
+inline const Transform3 Transform3::rotationX( float radians )
+{
+    return rotationX( floatInVec(radians) );
+}
+
+inline const Transform3 Transform3::rotationX( floatInVec radians )
+{
+    vec_float4 s, c, res1, res2;
+    vec_uint4 select_y, select_z;
+    vec_float4 zero;
+    select_y = _VECTORMATH_MASK_0x0F00;
+    select_z = _VECTORMATH_MASK_0x00F0;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( radians.get128(), &s, &c );
+    res1 = vec_sel( zero, c, select_y );
+    res1 = vec_sel( res1, s, select_z );
+    res2 = vec_sel( zero, negatef4(s), select_y );
+    res2 = vec_sel( res2, c, select_z );
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3( res1 ),
+        Vector3( res2 ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationY( float radians )
+{
+    return rotationY( floatInVec(radians) );
+}
+
+inline const Transform3 Transform3::rotationY( floatInVec radians )
+{
+    vec_float4 s, c, res0, res2;
+    vec_uint4 select_x, select_z;
+    vec_float4 zero;
+    select_x = _VECTORMATH_MASK_0xF000;
+    select_z = _VECTORMATH_MASK_0x00F0;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, negatef4(s), select_z );
+    res2 = vec_sel( zero, s, select_x );
+    res2 = vec_sel( res2, c, select_z );
+    return Transform3(
+        Vector3( res0 ),
+        Vector3::yAxis( ),
+        Vector3( res2 ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationZ( float radians )
+{
+    return rotationZ( floatInVec(radians) );
+}
+
+inline const Transform3 Transform3::rotationZ( floatInVec radians )
+{
+    vec_float4 s, c, res0, res1;
+    vec_uint4 select_x, select_y;
+    vec_float4 zero;
+    select_x = _VECTORMATH_MASK_0xF000;
+    select_y = _VECTORMATH_MASK_0x0F00;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, s, select_y );
+    res1 = vec_sel( zero, negatef4(s), select_x );
+    res1 = vec_sel( res1, c, select_y );
+    return Transform3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3::zAxis( ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationZYX( Vector3 radiansXYZ )
+{
+    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    angles = Vector4( radiansXYZ, 0.0f ).get128();
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = vec_mergel( c, s );
+    Z1 = vec_mergel( negS, c );
+    Z1 = vec_andc( Z1, (vec_float4)_VECTORMATH_MASK_0x000F );
+    Y0 = vec_perm( negS, c, _VECTORMATH_PERM_BBYX );
+    Y1 = vec_perm( c, s, _VECTORMATH_PERM_BBYX );
+    X0 = vec_splat( s, 0 );
+    X1 = vec_splat( c, 0 );
+    tmp = vec_madd( Z0, Y1, zero );
+    return Transform3(
+        Vector3( vec_madd( Z0, Y0, zero ) ),
+        Vector3( vec_madd( Z1, X1, vec_madd( tmp, X0, zero ) ) ),
+        Vector3( vec_nmsub( Z1, X0, vec_madd( tmp, X1, zero ) ) ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotation( float radians, Vector3 unitVec )
+{
+    return rotation( floatInVec(radians), unitVec );
+}
+
+inline const Transform3 Transform3::rotation( floatInVec radians, Vector3 unitVec )
+{
+    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) );
+}
+
+inline const Transform3 Transform3::rotation( Quat unitQuat )
+{
+    return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) );
+}
+
+inline const Transform3 Transform3::scale( Vector3 scaleVec )
+{
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    return Transform3(
+        Vector3( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0xF000 ) ),
+        Vector3( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0x0F00 ) ),
+        Vector3( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0x00F0 ) ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 appendScale( const Transform3 & tfrm, Vector3 scaleVec )
+{
+    return Transform3(
+        ( tfrm.getCol0() * scaleVec.getX( ) ),
+        ( tfrm.getCol1() * scaleVec.getY( ) ),
+        ( tfrm.getCol2() * scaleVec.getZ( ) ),
+        tfrm.getCol3()
+    );
+}
+
+inline const Transform3 prependScale( Vector3 scaleVec, const Transform3 & tfrm )
+{
+    return Transform3(
+        mulPerElem( tfrm.getCol0(), scaleVec ),
+        mulPerElem( tfrm.getCol1(), scaleVec ),
+        mulPerElem( tfrm.getCol2(), scaleVec ),
+        mulPerElem( tfrm.getCol3(), scaleVec )
+    );
+}
+
+inline const Transform3 Transform3::translation( Vector3 translateVec )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        translateVec
+    );
+}
+
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 )
+{
+    return Transform3(
+        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
+        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
+        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
+        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
+    );
+}
+
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, boolInVec select1 )
+{
+    return Transform3(
+        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
+        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
+        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
+        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Transform3 & tfrm )
+{
+    print( tfrm.getRow( 0 ) );
+    print( tfrm.getRow( 1 ) );
+    print( tfrm.getRow( 2 ) );
+}
+
+inline void print( const Transform3 & tfrm, const char * name )
+{
+    printf("%s:\n", name);
+    print( tfrm );
+}
+
+#endif
+
+inline Quat::Quat( const Matrix3 & tfrm )
+{
+    vec_float4 res;
+    vec_float4 col0, col1, col2;
+    vec_float4 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff;
+    vec_float4 zy_xz_yx, yz_zx_xy, sum, diff;
+    vec_float4 radicand, invSqrt, scale;
+    vec_float4 res0, res1, res2, res3;
+    vec_float4 xx, yy, zz;
+    vec_uint4 select_x = _VECTORMATH_MASK_0xF000;
+    vec_uint4 select_y = _VECTORMATH_MASK_0x0F00;
+    vec_uint4 select_z = _VECTORMATH_MASK_0x00F0;
+    vec_uint4 select_w = _VECTORMATH_MASK_0x000F;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+
+    col0 = tfrm.getCol0().get128();
+    col1 = tfrm.getCol1().get128();
+    col2 = tfrm.getCol2().get128();
+
+    /* four cases: */
+    /* trace > 0 */
+    /* else */
+    /*    xx largest diagonal element */
+    /*    yy largest diagonal element */
+    /*    zz largest diagonal element */
+
+    /* compute quaternion for each case */
+
+    xx_yy = vec_sel( col0, col1, select_y );
+    xx_yy_zz_xx = vec_perm( xx_yy, col2, _VECTORMATH_PERM_XYCX );
+    yy_zz_xx_yy = vec_perm( xx_yy, col2, _VECTORMATH_PERM_YCXY );
+    zz_xx_yy_zz = vec_perm( xx_yy, col2, _VECTORMATH_PERM_CXYC );
+
+    diagSum = vec_add( vec_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
+    diagDiff = vec_sub( vec_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
+    radicand = vec_add( vec_sel( diagDiff, diagSum, select_w ), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+    invSqrt = rsqrtf4( radicand );
+
+    zy_xz_yx = vec_sel( col0, col1, select_z );
+    zy_xz_yx = vec_perm( zy_xz_yx, col2, _VECTORMATH_PERM_ZAYX );
+    yz_zx_xy = vec_sel( col0, col1, select_x );
+    yz_zx_xy = vec_perm( yz_zx_xy, col2, _VECTORMATH_PERM_BZXX );
+
+    sum = vec_add( zy_xz_yx, yz_zx_xy );
+    diff = vec_sub( zy_xz_yx, yz_zx_xy );
+
+    scale = vec_madd( invSqrt, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), zero );
+    res0 = vec_perm( sum, diff, _VECTORMATH_PERM_XZYA );
+    res1 = vec_perm( sum, diff, _VECTORMATH_PERM_ZXXB );
+    res2 = vec_perm( sum, diff, _VECTORMATH_PERM_YXXC );
+    res3 = diff;
+    res0 = vec_sel( res0, radicand, select_x );
+    res1 = vec_sel( res1, radicand, select_y );
+    res2 = vec_sel( res2, radicand, select_z );
+    res3 = vec_sel( res3, radicand, select_w );
+    res0 = vec_madd( res0, vec_splat( scale, 0 ), zero );
+    res1 = vec_madd( res1, vec_splat( scale, 1 ), zero );
+    res2 = vec_madd( res2, vec_splat( scale, 2 ), zero );
+    res3 = vec_madd( res3, vec_splat( scale, 3 ), zero );
+
+    /* determine case and select answer */
+
+    xx = vec_splat( col0, 0 );
+    yy = vec_splat( col1, 1 );
+    zz = vec_splat( col2, 2 );
+    res = vec_sel( res0, res1, vec_cmpgt( yy, xx ) );
+    res = vec_sel( res, res2, vec_and( vec_cmpgt( zz, xx ), vec_cmpgt( zz, yy ) ) );
+    res = vec_sel( res, res3, vec_cmpgt( vec_splat( diagSum, 0 ), zero ) );
+    mVec128 = res;
+}
+
+inline const Matrix3 outer( Vector3 tfrm0, Vector3 tfrm1 )
+{
+    return Matrix3(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) )
+    );
+}
+
+inline const Matrix4 outer( Vector4 tfrm0, Vector4 tfrm1 )
+{
+    return Matrix4(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) ),
+        ( tfrm0 * tfrm1.getW( ) )
+    );
+}
+
+inline const Vector3 rowMul( Vector3 vec, const Matrix3 & mat )
+{
+    vec_float4 tmp0, tmp1, mcol0, mcol1, mcol2, res;
+    vec_float4 xxxx, yyyy, zzzz;
+    tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
+    tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
+    xxxx = vec_splat( vec.get128(), 0 );
+    mcol0 = vec_mergeh( tmp0, mat.getCol1().get128() );
+    mcol1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
+    mcol2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX );
+    yyyy = vec_splat( vec.get128(), 1 );
+    res = vec_madd( mcol0, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    zzzz = vec_splat( vec.get128(), 2 );
+    res = vec_madd( mcol1, yyyy, res );
+    res = vec_madd( mcol2, zzzz, res );
+    return Vector3( res );
+}
+
+inline const Matrix3 crossMatrix( Vector3 vec )
+{
+    vec_float4 neg, res0, res1, res2;
+    neg = negatef4( vec.get128() );
+    res0 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_XZBX );
+    res1 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_CXXX );
+    res2 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_YAXX );
+    res0 = vec_andc( res0, (vec_float4)_VECTORMATH_MASK_0xF000 );
+    res1 = vec_andc( res1, (vec_float4)_VECTORMATH_MASK_0x0F00 );
+    res2 = vec_andc( res2, (vec_float4)_VECTORMATH_MASK_0x00F0 );
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3( res2 )
+    );
+}
+
+inline const Matrix3 crossMatrixMul( Vector3 vec, const Matrix3 & mat )
+{
+    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
+}
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/mat_soa.h b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/mat_soa.h
index 8c5d8319a..7868cfd4b 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/mat_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/mat_soa.h
@@ -1,1744 +1,1744 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_SOA_CPP_H
-#define _VECTORMATH_MAT_SOA_CPP_H
-
-namespace Vectormath {
-namespace Soa {
-
-//-----------------------------------------------------------------------------
-// Constants
-
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-inline Matrix3::Matrix3( const Matrix3 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-}
-
-inline Matrix3::Matrix3( vec_float4 scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-}
-
-inline Matrix3::Matrix3( const Quat & unitQuat )
-{
-    vec_float4 qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
-    qx = unitQuat.getX();
-    qy = unitQuat.getY();
-    qz = unitQuat.getZ();
-    qw = unitQuat.getW();
-    qx2 = vec_add( qx, qx );
-    qy2 = vec_add( qy, qy );
-    qz2 = vec_add( qz, qz );
-    qxqx2 = vec_madd( qx, qx2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qxqy2 = vec_madd( qx, qy2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qxqz2 = vec_madd( qx, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qxqw2 = vec_madd( qw, qx2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qyqy2 = vec_madd( qy, qy2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qyqz2 = vec_madd( qy, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qyqw2 = vec_madd( qw, qy2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qzqz2 = vec_madd( qz, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qzqw2 = vec_madd( qw, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    mCol0 = Vector3( vec_sub( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), qyqy2 ), qzqz2 ), vec_add( qxqy2, qzqw2 ), vec_sub( qxqz2, qyqw2 ) );
-    mCol1 = Vector3( vec_sub( qxqy2, qzqw2 ), vec_sub( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), qxqx2 ), qzqz2 ), vec_add( qyqz2, qxqw2 ) );
-    mCol2 = Vector3( vec_add( qxqz2, qyqw2 ), vec_sub( qyqz2, qxqw2 ), vec_sub( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), qxqx2 ), qyqy2 ) );
-}
-
-inline Matrix3::Matrix3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-}
-
-inline Matrix3::Matrix3( const Aos::Matrix3 & mat )
-{
-    mCol0 = Vector3( mat.getCol0() );
-    mCol1 = Vector3( mat.getCol1() );
-    mCol2 = Vector3( mat.getCol2() );
-}
-
-inline Matrix3::Matrix3( const Aos::Matrix3 & mat0, const Aos::Matrix3 & mat1, const Aos::Matrix3 & mat2, const Aos::Matrix3 & mat3 )
-{
-    mCol0 = Vector3( mat0.getCol0(), mat1.getCol0(), mat2.getCol0(), mat3.getCol0() );
-    mCol1 = Vector3( mat0.getCol1(), mat1.getCol1(), mat2.getCol1(), mat3.getCol1() );
-    mCol2 = Vector3( mat0.getCol2(), mat1.getCol2(), mat2.getCol2(), mat3.getCol2() );
-}
-
-inline void Matrix3::get4Aos( Aos::Matrix3 & result0, Aos::Matrix3 & result1, Aos::Matrix3 & result2, Aos::Matrix3 & result3 ) const
-{
-    Aos::Vector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    mCol0.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol0( tmpV3_0 );
-    result1.setCol0( tmpV3_1 );
-    result2.setCol0( tmpV3_2 );
-    result3.setCol0( tmpV3_3 );
-    mCol1.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol1( tmpV3_0 );
-    result1.setCol1( tmpV3_1 );
-    result2.setCol1( tmpV3_2 );
-    result3.setCol1( tmpV3_3 );
-    mCol2.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol2( tmpV3_0 );
-    result1.setCol2( tmpV3_1 );
-    result2.setCol2( tmpV3_2 );
-    result3.setCol2( tmpV3_3 );
-}
-
-inline Matrix3 & Matrix3::setCol0( const Vector3 & _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol1( const Vector3 & _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol2( const Vector3 & _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol( int col, const Vector3 & vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setRow( int row, const Vector3 & vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setElem( int col, int row, vec_float4 val )
-{
-    Vector3 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline vec_float4 Matrix3::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector3 Matrix3::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector3 Matrix3::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector3 Matrix3::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector3 Matrix3::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Matrix3::getRow( int row ) const
-{
-    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
-}
-
-inline Vector3 & Matrix3::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Matrix3::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    return *this;
-}
-
-inline const Matrix3 transpose( const Matrix3 & mat )
-{
-    return Matrix3(
-        Vector3( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX() ),
-        Vector3( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY() ),
-        Vector3( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ() )
-    );
-}
-
-inline const Matrix3 inverse( const Matrix3 & mat )
-{
-    Vector3 tmp0, tmp1, tmp2;
-    vec_float4 detinv;
-    tmp0 = cross( mat.getCol1(), mat.getCol2() );
-    tmp1 = cross( mat.getCol2(), mat.getCol0() );
-    tmp2 = cross( mat.getCol0(), mat.getCol1() );
-    detinv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), dot( mat.getCol2(), tmp2 ) );
-    return Matrix3(
-        Vector3( vec_madd( tmp0.getX(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.getX(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.getX(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        Vector3( vec_madd( tmp0.getY(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.getY(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.getY(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        Vector3( vec_madd( tmp0.getZ(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.getZ(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.getZ(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
-    );
-}
-
-inline vec_float4 determinant( const Matrix3 & mat )
-{
-    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
-}
-
-inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( mCol0 + mat.mCol0 ),
-        ( mCol1 + mat.mCol1 ),
-        ( mCol2 + mat.mCol2 )
-    );
-}
-
-inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( mCol0 - mat.mCol0 ),
-        ( mCol1 - mat.mCol1 ),
-        ( mCol2 - mat.mCol2 )
-    );
-}
-
-inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
-{
-    *this = *this + mat;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
-{
-    *this = *this - mat;
-    return *this;
-}
-
-inline const Matrix3 Matrix3::operator -( ) const
-{
-    return Matrix3(
-        ( -mCol0 ),
-        ( -mCol1 ),
-        ( -mCol2 )
-    );
-}
-
-inline const Matrix3 absPerElem( const Matrix3 & mat )
-{
-    return Matrix3(
-        absPerElem( mat.getCol0() ),
-        absPerElem( mat.getCol1() ),
-        absPerElem( mat.getCol2() )
-    );
-}
-
-inline const Matrix3 Matrix3::operator *( vec_float4 scalar ) const
-{
-    return Matrix3(
-        ( mCol0 * scalar ),
-        ( mCol1 * scalar ),
-        ( mCol2 * scalar )
-    );
-}
-
-inline Matrix3 & Matrix3::operator *=( vec_float4 scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Matrix3 operator *( vec_float4 scalar, const Matrix3 & mat )
-{
-    return mat * scalar;
-}
-
-inline const Vector3 Matrix3::operator *( const Vector3 & vec ) const
-{
-    return Vector3(
-        vec_add( vec_add( vec_madd( mCol0.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getX(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getX(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_add( vec_madd( mCol0.getY(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getY(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_add( vec_madd( mCol0.getZ(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getZ(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
-    );
-}
-
-inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( *this * mat.mCol0 ),
-        ( *this * mat.mCol1 ),
-        ( *this * mat.mCol2 )
-    );
-}
-
-inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
-{
-    *this = *this * mat;
-    return *this;
-}
-
-inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
-{
-    return Matrix3(
-        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
-        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
-        mulPerElem( mat0.getCol2(), mat1.getCol2() )
-    );
-}
-
-inline const Matrix3 Matrix3::identity( )
-{
-    return Matrix3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationX( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Matrix3(
-        Vector3::xAxis( ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, s ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), c )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationY( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Matrix3(
-        Vector3( c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ) ),
-        Vector3::yAxis( ),
-        Vector3( s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationZ( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Matrix3(
-        Vector3( c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector3( negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector3::zAxis( )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationZYX( const Vector3 & radiansXYZ )
-{
-    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sincosf4( radiansXYZ.getX(), &sX, &cX );
-    sincosf4( radiansXYZ.getY(), &sY, &cY );
-    sincosf4( radiansXYZ.getZ(), &sZ, &cZ );
-    tmp0 = vec_madd( cZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmp1 = vec_madd( sZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    return Matrix3(
-        Vector3( vec_madd( cZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), negatef4( sY ) ),
-        Vector3( vec_sub( vec_madd( tmp0, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( tmp1, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        Vector3( vec_add( vec_madd( tmp0, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( tmp1, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
-    );
-}
-
-inline const Matrix3 Matrix3::rotation( vec_float4 radians, const Vector3 & unitVec )
-{
-    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
-    sincosf4( radians, &s, &c );
-    x = unitVec.getX();
-    y = unitVec.getY();
-    z = unitVec.getZ();
-    xy = vec_madd( x, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    yz = vec_madd( y, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    zx = vec_madd( z, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
-    return Matrix3(
-        Vector3( vec_add( vec_madd( vec_madd( x, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) ),
-        Vector3( vec_sub( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( y, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) ),
-        Vector3( vec_add( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( z, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ) )
-    );
-}
-
-inline const Matrix3 Matrix3::rotation( const Quat & unitQuat )
-{
-    return Matrix3( unitQuat );
-}
-
-inline const Matrix3 Matrix3::scale( const Vector3 & scaleVec )
-{
-    return Matrix3(
-        Vector3( scaleVec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec.getZ() )
-    );
-}
-
-inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec )
-{
-    return Matrix3(
-        ( mat.getCol0() * scaleVec.getX( ) ),
-        ( mat.getCol1() * scaleVec.getY( ) ),
-        ( mat.getCol2() * scaleVec.getZ( ) )
-    );
-}
-
-inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat )
-{
-    return Matrix3(
-        mulPerElem( mat.getCol0(), scaleVec ),
-        mulPerElem( mat.getCol1(), scaleVec ),
-        mulPerElem( mat.getCol2(), scaleVec )
-    );
-}
-
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, vec_uint4 select1 )
-{
-    return Matrix3(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Matrix3 & mat )
-{
-    Aos::Matrix3 mat0, mat1, mat2, mat3;
-    mat.get4Aos( mat0, mat1, mat2, mat3 );
-    printf("slot 0:\n");
-    print( mat0 );
-    printf("slot 1:\n");
-    print( mat1 );
-    printf("slot 2:\n");
-    print( mat2 );
-    printf("slot 3:\n");
-    print( mat3 );
-}
-
-inline void print( const Matrix3 & mat, const char * name )
-{
-    printf("%s:\n", name);
-    print( mat );
-}
-
-#endif
-
-inline Matrix4::Matrix4( const Matrix4 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    mCol3 = mat.mCol3;
-}
-
-inline Matrix4::Matrix4( vec_float4 scalar )
-{
-    mCol0 = Vector4( scalar );
-    mCol1 = Vector4( scalar );
-    mCol2 = Vector4( scalar );
-    mCol3 = Vector4( scalar );
-}
-
-inline Matrix4::Matrix4( const Transform3 & mat )
-{
-    mCol0 = Vector4( mat.getCol0(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    mCol1 = Vector4( mat.getCol1(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    mCol2 = Vector4( mat.getCol2(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    mCol3 = Vector4( mat.getCol3(), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-inline Matrix4::Matrix4( const Vector4 & _col0, const Vector4 & _col1, const Vector4 & _col2, const Vector4 & _col3 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-    mCol3 = _col3;
-}
-
-inline Matrix4::Matrix4( const Matrix3 & mat, const Vector3 & translateVec )
-{
-    mCol0 = Vector4( mat.getCol0(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    mCol1 = Vector4( mat.getCol1(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    mCol2 = Vector4( mat.getCol2(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    mCol3 = Vector4( translateVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-inline Matrix4::Matrix4( const Quat & unitQuat, const Vector3 & translateVec )
-{
-    Matrix3 mat;
-    mat = Matrix3( unitQuat );
-    mCol0 = Vector4( mat.getCol0(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    mCol1 = Vector4( mat.getCol1(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    mCol2 = Vector4( mat.getCol2(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    mCol3 = Vector4( translateVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-inline Matrix4::Matrix4( const Aos::Matrix4 & mat )
-{
-    mCol0 = Vector4( mat.getCol0() );
-    mCol1 = Vector4( mat.getCol1() );
-    mCol2 = Vector4( mat.getCol2() );
-    mCol3 = Vector4( mat.getCol3() );
-}
-
-inline Matrix4::Matrix4( const Aos::Matrix4 & mat0, const Aos::Matrix4 & mat1, const Aos::Matrix4 & mat2, const Aos::Matrix4 & mat3 )
-{
-    mCol0 = Vector4( mat0.getCol0(), mat1.getCol0(), mat2.getCol0(), mat3.getCol0() );
-    mCol1 = Vector4( mat0.getCol1(), mat1.getCol1(), mat2.getCol1(), mat3.getCol1() );
-    mCol2 = Vector4( mat0.getCol2(), mat1.getCol2(), mat2.getCol2(), mat3.getCol2() );
-    mCol3 = Vector4( mat0.getCol3(), mat1.getCol3(), mat2.getCol3(), mat3.getCol3() );
-}
-
-inline void Matrix4::get4Aos( Aos::Matrix4 & result0, Aos::Matrix4 & result1, Aos::Matrix4 & result2, Aos::Matrix4 & result3 ) const
-{
-    Aos::Vector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
-    mCol0.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
-    result0.setCol0( tmpV4_0 );
-    result1.setCol0( tmpV4_1 );
-    result2.setCol0( tmpV4_2 );
-    result3.setCol0( tmpV4_3 );
-    mCol1.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
-    result0.setCol1( tmpV4_0 );
-    result1.setCol1( tmpV4_1 );
-    result2.setCol1( tmpV4_2 );
-    result3.setCol1( tmpV4_3 );
-    mCol2.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
-    result0.setCol2( tmpV4_0 );
-    result1.setCol2( tmpV4_1 );
-    result2.setCol2( tmpV4_2 );
-    result3.setCol2( tmpV4_3 );
-    mCol3.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
-    result0.setCol3( tmpV4_0 );
-    result1.setCol3( tmpV4_1 );
-    result2.setCol3( tmpV4_2 );
-    result3.setCol3( tmpV4_3 );
-}
-
-inline Matrix4 & Matrix4::setCol0( const Vector4 & _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol1( const Vector4 & _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol2( const Vector4 & _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol3( const Vector4 & _col3 )
-{
-    mCol3 = _col3;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol( int col, const Vector4 & vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setRow( int row, const Vector4 & vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    mCol3.setElem( row, vec.getElem( 3 ) );
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setElem( int col, int row, vec_float4 val )
-{
-    Vector4 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline vec_float4 Matrix4::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector4 Matrix4::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector4 Matrix4::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector4 Matrix4::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector4 Matrix4::getCol3( ) const
-{
-    return mCol3;
-}
-
-inline const Vector4 Matrix4::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Matrix4::getRow( int row ) const
-{
-    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
-}
-
-inline Vector4 & Matrix4::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Matrix4::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    mCol3 = mat.mCol3;
-    return *this;
-}
-
-inline const Matrix4 transpose( const Matrix4 & mat )
-{
-    return Matrix4(
-        Vector4( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX(), mat.getCol3().getX() ),
-        Vector4( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY(), mat.getCol3().getY() ),
-        Vector4( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ(), mat.getCol3().getZ() ),
-        Vector4( mat.getCol0().getW(), mat.getCol1().getW(), mat.getCol2().getW(), mat.getCol3().getW() )
-    );
-}
-
-inline const Matrix4 inverse( const Matrix4 & mat )
-{
-    Vector4 res0, res1, res2, res3;
-    vec_float4 mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
-    mA = mat.getCol0().getX();
-    mB = mat.getCol0().getY();
-    mC = mat.getCol0().getZ();
-    mD = mat.getCol0().getW();
-    mE = mat.getCol1().getX();
-    mF = mat.getCol1().getY();
-    mG = mat.getCol1().getZ();
-    mH = mat.getCol1().getW();
-    mI = mat.getCol2().getX();
-    mJ = mat.getCol2().getY();
-    mK = mat.getCol2().getZ();
-    mL = mat.getCol2().getW();
-    mM = mat.getCol3().getX();
-    mN = mat.getCol3().getY();
-    mO = mat.getCol3().getZ();
-    mP = mat.getCol3().getW();
-    tmp0 = vec_sub( vec_madd( mK, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp1 = vec_sub( vec_madd( mO, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp2 = vec_sub( vec_madd( mB, mK, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mJ, mC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp3 = vec_sub( vec_madd( mF, mO, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mN, mG, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp4 = vec_sub( vec_madd( mJ, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mB, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp5 = vec_sub( vec_madd( mN, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mF, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res0.setX( vec_sub( vec_sub( vec_madd( mJ, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mL, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mK, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    res0.setY( vec_sub( vec_sub( vec_madd( mN, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mP, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mO, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    res0.setZ( vec_sub( vec_add( vec_madd( mD, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mB, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    res0.setW( vec_sub( vec_add( vec_madd( mH, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mF, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    detInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_add( vec_add( vec_add( vec_madd( mA, res0.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, res0.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mI, res0.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mM, res0.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    res1.setX( vec_madd( mI, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res1.setY( vec_madd( mM, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res1.setZ( vec_madd( mA, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res1.setW( vec_madd( mE, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res3.setX( vec_madd( mI, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res3.setY( vec_madd( mM, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res3.setZ( vec_madd( mA, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res3.setW( vec_madd( mE, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res2.setX( vec_madd( mI, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res2.setY( vec_madd( mM, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res2.setZ( vec_madd( mA, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res2.setW( vec_madd( mE, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp0 = vec_sub( vec_madd( mI, mB, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mA, mJ, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp1 = vec_sub( vec_madd( mM, mF, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, mN, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp2 = vec_sub( vec_madd( mI, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mA, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp3 = vec_sub( vec_madd( mM, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp4 = vec_sub( vec_madd( mI, mC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mA, mK, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp5 = vec_sub( vec_madd( mM, mG, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, mO, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res2.setX( vec_add( vec_sub( vec_madd( mL, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mJ, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.getX() ) );
-    res2.setY( vec_add( vec_sub( vec_madd( mP, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mN, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.getY() ) );
-    res2.setZ( vec_sub( vec_sub( vec_madd( mB, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mD, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.getZ() ) );
-    res2.setW( vec_sub( vec_sub( vec_madd( mF, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mH, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.getW() ) );
-    res3.setX( vec_add( vec_sub( vec_madd( mJ, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mK, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.getX() ) );
-    res3.setY( vec_add( vec_sub( vec_madd( mN, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mO, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.getY() ) );
-    res3.setZ( vec_sub( vec_sub( vec_madd( mC, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mB, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.getZ() ) );
-    res3.setW( vec_sub( vec_sub( vec_madd( mG, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mF, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.getW() ) );
-    res1.setX( vec_sub( vec_sub( vec_madd( mK, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mL, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.getX() ) );
-    res1.setY( vec_sub( vec_sub( vec_madd( mO, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mP, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.getY() ) );
-    res1.setZ( vec_add( vec_sub( vec_madd( mD, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.getZ() ) );
-    res1.setW( vec_add( vec_sub( vec_madd( mH, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.getW() ) );
-    return Matrix4(
-        ( res0 * detInv ),
-        ( res1 * detInv ),
-        ( res2 * detInv ),
-        ( res3 * detInv )
-    );
-}
-
-inline const Matrix4 affineInverse( const Matrix4 & mat )
-{
-    Transform3 affineMat;
-    affineMat.setCol0( mat.getCol0().getXYZ( ) );
-    affineMat.setCol1( mat.getCol1().getXYZ( ) );
-    affineMat.setCol2( mat.getCol2().getXYZ( ) );
-    affineMat.setCol3( mat.getCol3().getXYZ( ) );
-    return Matrix4( inverse( affineMat ) );
-}
-
-inline const Matrix4 orthoInverse( const Matrix4 & mat )
-{
-    Transform3 affineMat;
-    affineMat.setCol0( mat.getCol0().getXYZ( ) );
-    affineMat.setCol1( mat.getCol1().getXYZ( ) );
-    affineMat.setCol2( mat.getCol2().getXYZ( ) );
-    affineMat.setCol3( mat.getCol3().getXYZ( ) );
-    return Matrix4( orthoInverse( affineMat ) );
-}
-
-inline vec_float4 determinant( const Matrix4 & mat )
-{
-    vec_float4 dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-    mA = mat.getCol0().getX();
-    mB = mat.getCol0().getY();
-    mC = mat.getCol0().getZ();
-    mD = mat.getCol0().getW();
-    mE = mat.getCol1().getX();
-    mF = mat.getCol1().getY();
-    mG = mat.getCol1().getZ();
-    mH = mat.getCol1().getW();
-    mI = mat.getCol2().getX();
-    mJ = mat.getCol2().getY();
-    mK = mat.getCol2().getZ();
-    mL = mat.getCol2().getW();
-    mM = mat.getCol3().getX();
-    mN = mat.getCol3().getY();
-    mO = mat.getCol3().getZ();
-    mP = mat.getCol3().getW();
-    tmp0 = vec_sub( vec_madd( mK, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp1 = vec_sub( vec_madd( mO, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp2 = vec_sub( vec_madd( mB, mK, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mJ, mC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp3 = vec_sub( vec_madd( mF, mO, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mN, mG, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp4 = vec_sub( vec_madd( mJ, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mB, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp5 = vec_sub( vec_madd( mN, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mF, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    dx = vec_sub( vec_sub( vec_madd( mJ, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mL, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mK, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    dy = vec_sub( vec_sub( vec_madd( mN, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mP, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mO, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    dz = vec_sub( vec_add( vec_madd( mD, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mB, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    dw = vec_sub( vec_add( vec_madd( mH, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mF, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return vec_add( vec_add( vec_add( vec_madd( mA, dx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, dy, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mI, dz, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mM, dw, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( mCol0 + mat.mCol0 ),
-        ( mCol1 + mat.mCol1 ),
-        ( mCol2 + mat.mCol2 ),
-        ( mCol3 + mat.mCol3 )
-    );
-}
-
-inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( mCol0 - mat.mCol0 ),
-        ( mCol1 - mat.mCol1 ),
-        ( mCol2 - mat.mCol2 ),
-        ( mCol3 - mat.mCol3 )
-    );
-}
-
-inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
-{
-    *this = *this + mat;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
-{
-    *this = *this - mat;
-    return *this;
-}
-
-inline const Matrix4 Matrix4::operator -( ) const
-{
-    return Matrix4(
-        ( -mCol0 ),
-        ( -mCol1 ),
-        ( -mCol2 ),
-        ( -mCol3 )
-    );
-}
-
-inline const Matrix4 absPerElem( const Matrix4 & mat )
-{
-    return Matrix4(
-        absPerElem( mat.getCol0() ),
-        absPerElem( mat.getCol1() ),
-        absPerElem( mat.getCol2() ),
-        absPerElem( mat.getCol3() )
-    );
-}
-
-inline const Matrix4 Matrix4::operator *( vec_float4 scalar ) const
-{
-    return Matrix4(
-        ( mCol0 * scalar ),
-        ( mCol1 * scalar ),
-        ( mCol2 * scalar ),
-        ( mCol3 * scalar )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( vec_float4 scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Matrix4 operator *( vec_float4 scalar, const Matrix4 & mat )
-{
-    return mat * scalar;
-}
-
-inline const Vector4 Matrix4::operator *( const Vector4 & vec ) const
-{
-    return Vector4(
-        vec_add( vec_add( vec_add( vec_madd( mCol0.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getX(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getX(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol3.getX(), vec.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_add( vec_add( vec_madd( mCol0.getY(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getY(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol3.getY(), vec.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_add( vec_add( vec_madd( mCol0.getZ(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getZ(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol3.getZ(), vec.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_add( vec_add( vec_madd( mCol0.getW(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getW(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getW(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol3.getW(), vec.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
-    );
-}
-
-inline const Vector4 Matrix4::operator *( const Vector3 & vec ) const
-{
-    return Vector4(
-        vec_add( vec_add( vec_madd( mCol0.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getX(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getX(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_add( vec_madd( mCol0.getY(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getY(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_add( vec_madd( mCol0.getZ(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getZ(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_add( vec_madd( mCol0.getW(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getW(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getW(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
-    );
-}
-
-inline const Vector4 Matrix4::operator *( const Point3 & pnt ) const
-{
-    return Vector4(
-        vec_add( vec_add( vec_add( vec_madd( mCol0.getX(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getX(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getX(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getX() ),
-        vec_add( vec_add( vec_add( vec_madd( mCol0.getY(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getY(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getY(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getY() ),
-        vec_add( vec_add( vec_add( vec_madd( mCol0.getZ(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getZ(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getZ(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getZ() ),
-        vec_add( vec_add( vec_add( vec_madd( mCol0.getW(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getW(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getW(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getW() )
-    );
-}
-
-inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( *this * mat.mCol0 ),
-        ( *this * mat.mCol1 ),
-        ( *this * mat.mCol2 ),
-        ( *this * mat.mCol3 )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
-{
-    *this = *this * mat;
-    return *this;
-}
-
-inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
-{
-    return Matrix4(
-        ( *this * tfrm.getCol0() ),
-        ( *this * tfrm.getCol1() ),
-        ( *this * tfrm.getCol2() ),
-        ( *this * Point3( tfrm.getCol3() ) )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
-{
-    *this = *this * tfrm;
-    return *this;
-}
-
-inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
-{
-    return Matrix4(
-        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
-        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
-        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
-        mulPerElem( mat0.getCol3(), mat1.getCol3() )
-    );
-}
-
-inline const Matrix4 Matrix4::identity( )
-{
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4::yAxis( ),
-        Vector4::zAxis( ),
-        Vector4::wAxis( )
-    );
-}
-
-inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
-{
-    mCol0.setXYZ( mat3.getCol0() );
-    mCol1.setXYZ( mat3.getCol1() );
-    mCol2.setXYZ( mat3.getCol2() );
-    return *this;
-}
-
-inline const Matrix3 Matrix4::getUpper3x3( ) const
-{
-    return Matrix3(
-        mCol0.getXYZ( ),
-        mCol1.getXYZ( ),
-        mCol2.getXYZ( )
-    );
-}
-
-inline Matrix4 & Matrix4::setTranslation( const Vector3 & translateVec )
-{
-    mCol3.setXYZ( translateVec );
-    return *this;
-}
-
-inline const Vector3 Matrix4::getTranslation( ) const
-{
-    return mCol3.getXYZ( );
-}
-
-inline const Matrix4 Matrix4::rotationX( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationY( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Matrix4(
-        Vector4( c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4::yAxis( ),
-        Vector4( s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationZ( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Matrix4(
-        Vector4( c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4::zAxis( ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationZYX( const Vector3 & radiansXYZ )
-{
-    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sincosf4( radiansXYZ.getX(), &sX, &cX );
-    sincosf4( radiansXYZ.getY(), &sY, &cY );
-    sincosf4( radiansXYZ.getZ(), &sZ, &cZ );
-    tmp0 = vec_madd( cZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmp1 = vec_madd( sZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    return Matrix4(
-        Vector4( vec_madd( cZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), negatef4( sY ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( vec_sub( vec_madd( tmp0, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( tmp1, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( vec_add( vec_madd( tmp0, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( tmp1, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotation( vec_float4 radians, const Vector3 & unitVec )
-{
-    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
-    sincosf4( radians, &s, &c );
-    x = unitVec.getX();
-    y = unitVec.getY();
-    z = unitVec.getZ();
-    xy = vec_madd( x, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    yz = vec_madd( y, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    zx = vec_madd( z, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
-    return Matrix4(
-        Vector4( vec_add( vec_madd( vec_madd( x, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( vec_sub( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( y, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( vec_add( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( z, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotation( const Quat & unitQuat )
-{
-    return Matrix4( Transform3::rotation( unitQuat ) );
-}
-
-inline const Matrix4 Matrix4::scale( const Vector3 & scaleVec )
-{
-    return Matrix4(
-        Vector4( scaleVec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec )
-{
-    return Matrix4(
-        ( mat.getCol0() * scaleVec.getX( ) ),
-        ( mat.getCol1() * scaleVec.getY( ) ),
-        ( mat.getCol2() * scaleVec.getZ( ) ),
-        mat.getCol3()
-    );
-}
-
-inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat )
-{
-    Vector4 scale4;
-    scale4 = Vector4( scaleVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-    return Matrix4(
-        mulPerElem( mat.getCol0(), scale4 ),
-        mulPerElem( mat.getCol1(), scale4 ),
-        mulPerElem( mat.getCol2(), scale4 ),
-        mulPerElem( mat.getCol3(), scale4 )
-    );
-}
-
-inline const Matrix4 Matrix4::translation( const Vector3 & translateVec )
-{
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4::yAxis( ),
-        Vector4::zAxis( ),
-        Vector4( translateVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) )
-    );
-}
-
-inline const Matrix4 Matrix4::lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec )
-{
-    Matrix4 m4EyeFrame;
-    Vector3 v3X, v3Y, v3Z;
-    v3Y = normalize( upVec );
-    v3Z = normalize( ( eyePos - lookAtPos ) );
-    v3X = normalize( cross( v3Y, v3Z ) );
-    v3Y = cross( v3Z, v3X );
-    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
-    return orthoInverse( m4EyeFrame );
-}
-
-inline const Matrix4 Matrix4::perspective( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar )
-{
-    vec_float4 f, rangeInv;
-    f = tanf4( vec_sub( ((vec_float4){_VECTORMATH_PI_OVER_2,_VECTORMATH_PI_OVER_2,_VECTORMATH_PI_OVER_2,_VECTORMATH_PI_OVER_2}), vec_madd( ((vec_float4){0.5f,0.5f,0.5f,0.5f}), fovyRadians, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    rangeInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( zNear, zFar ) );
-    return Matrix4(
-        Vector4( divf4( f, aspect ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), f, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( vec_add( zNear, zFar ), rangeInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){-1.0f,-1.0f,-1.0f,-1.0f}) ),
-        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( vec_madd( vec_madd( zNear, zFar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), rangeInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){2.0f,2.0f,2.0f,2.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Matrix4 Matrix4::frustum( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
-{
-    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
-    sum_rl = vec_add( right, left );
-    sum_tb = vec_add( top, bottom );
-    sum_nf = vec_add( zNear, zFar );
-    inv_rl = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( right, left ) );
-    inv_tb = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( top, bottom ) );
-    inv_nf = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( zNear, zFar ) );
-    n2 = vec_add( zNear, zNear );
-    return Matrix4(
-        Vector4( vec_madd( n2, inv_rl, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( n2, inv_tb, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( vec_madd( sum_rl, inv_rl, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sum_tb, inv_tb, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sum_nf, inv_nf, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){-1.0f,-1.0f,-1.0f,-1.0f}) ),
-        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( vec_madd( n2, inv_nf, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), zFar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Matrix4 Matrix4::orthographic( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
-{
-    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
-    sum_rl = vec_add( right, left );
-    sum_tb = vec_add( top, bottom );
-    sum_nf = vec_add( zNear, zFar );
-    inv_rl = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( right, left ) );
-    inv_tb = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( top, bottom ) );
-    inv_nf = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( zNear, zFar ) );
-    return Matrix4(
-        Vector4( vec_add( inv_rl, inv_rl ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_add( inv_tb, inv_tb ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_add( inv_nf, inv_nf ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( vec_madd( negatef4( sum_rl ), inv_rl, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( negatef4( sum_tb ), inv_tb, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sum_nf, inv_nf, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) )
-    );
-}
-
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, vec_uint4 select1 )
-{
-    return Matrix4(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 ),
-        select( mat0.getCol3(), mat1.getCol3(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Matrix4 & mat )
-{
-    Aos::Matrix4 mat0, mat1, mat2, mat3;
-    mat.get4Aos( mat0, mat1, mat2, mat3 );
-    printf("slot 0:\n");
-    print( mat0 );
-    printf("slot 1:\n");
-    print( mat1 );
-    printf("slot 2:\n");
-    print( mat2 );
-    printf("slot 3:\n");
-    print( mat3 );
-}
-
-inline void print( const Matrix4 & mat, const char * name )
-{
-    printf("%s:\n", name);
-    print( mat );
-}
-
-#endif
-
-inline Transform3::Transform3( const Transform3 & tfrm )
-{
-    mCol0 = tfrm.mCol0;
-    mCol1 = tfrm.mCol1;
-    mCol2 = tfrm.mCol2;
-    mCol3 = tfrm.mCol3;
-}
-
-inline Transform3::Transform3( vec_float4 scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-    mCol3 = Vector3( scalar );
-}
-
-inline Transform3::Transform3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2, const Vector3 & _col3 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-    mCol3 = _col3;
-}
-
-inline Transform3::Transform3( const Matrix3 & tfrm, const Vector3 & translateVec )
-{
-    this->setUpper3x3( tfrm );
-    this->setTranslation( translateVec );
-}
-
-inline Transform3::Transform3( const Quat & unitQuat, const Vector3 & translateVec )
-{
-    this->setUpper3x3( Matrix3( unitQuat ) );
-    this->setTranslation( translateVec );
-}
-
-inline Transform3::Transform3( const Aos::Transform3 & tfrm )
-{
-    mCol0 = Vector3( tfrm.getCol0() );
-    mCol1 = Vector3( tfrm.getCol1() );
-    mCol2 = Vector3( tfrm.getCol2() );
-    mCol3 = Vector3( tfrm.getCol3() );
-}
-
-inline Transform3::Transform3( const Aos::Transform3 & tfrm0, const Aos::Transform3 & tfrm1, const Aos::Transform3 & tfrm2, const Aos::Transform3 & tfrm3 )
-{
-    mCol0 = Vector3( tfrm0.getCol0(), tfrm1.getCol0(), tfrm2.getCol0(), tfrm3.getCol0() );
-    mCol1 = Vector3( tfrm0.getCol1(), tfrm1.getCol1(), tfrm2.getCol1(), tfrm3.getCol1() );
-    mCol2 = Vector3( tfrm0.getCol2(), tfrm1.getCol2(), tfrm2.getCol2(), tfrm3.getCol2() );
-    mCol3 = Vector3( tfrm0.getCol3(), tfrm1.getCol3(), tfrm2.getCol3(), tfrm3.getCol3() );
-}
-
-inline void Transform3::get4Aos( Aos::Transform3 & result0, Aos::Transform3 & result1, Aos::Transform3 & result2, Aos::Transform3 & result3 ) const
-{
-    Aos::Vector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    mCol0.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol0( tmpV3_0 );
-    result1.setCol0( tmpV3_1 );
-    result2.setCol0( tmpV3_2 );
-    result3.setCol0( tmpV3_3 );
-    mCol1.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol1( tmpV3_0 );
-    result1.setCol1( tmpV3_1 );
-    result2.setCol1( tmpV3_2 );
-    result3.setCol1( tmpV3_3 );
-    mCol2.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol2( tmpV3_0 );
-    result1.setCol2( tmpV3_1 );
-    result2.setCol2( tmpV3_2 );
-    result3.setCol2( tmpV3_3 );
-    mCol3.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol3( tmpV3_0 );
-    result1.setCol3( tmpV3_1 );
-    result2.setCol3( tmpV3_2 );
-    result3.setCol3( tmpV3_3 );
-}
-
-inline Transform3 & Transform3::setCol0( const Vector3 & _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol1( const Vector3 & _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol2( const Vector3 & _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol3( const Vector3 & _col3 )
-{
-    mCol3 = _col3;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol( int col, const Vector3 & vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Transform3 & Transform3::setRow( int row, const Vector4 & vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    mCol3.setElem( row, vec.getElem( 3 ) );
-    return *this;
-}
-
-inline Transform3 & Transform3::setElem( int col, int row, vec_float4 val )
-{
-    Vector3 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline vec_float4 Transform3::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector3 Transform3::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector3 Transform3::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector3 Transform3::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector3 Transform3::getCol3( ) const
-{
-    return mCol3;
-}
-
-inline const Vector3 Transform3::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Transform3::getRow( int row ) const
-{
-    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
-}
-
-inline Vector3 & Transform3::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Transform3::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
-{
-    mCol0 = tfrm.mCol0;
-    mCol1 = tfrm.mCol1;
-    mCol2 = tfrm.mCol2;
-    mCol3 = tfrm.mCol3;
-    return *this;
-}
-
-inline const Transform3 inverse( const Transform3 & tfrm )
-{
-    Vector3 tmp0, tmp1, tmp2, inv0, inv1, inv2;
-    vec_float4 detinv;
-    tmp0 = cross( tfrm.getCol1(), tfrm.getCol2() );
-    tmp1 = cross( tfrm.getCol2(), tfrm.getCol0() );
-    tmp2 = cross( tfrm.getCol0(), tfrm.getCol1() );
-    detinv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), dot( tfrm.getCol2(), tmp2 ) );
-    inv0 = Vector3( vec_madd( tmp0.getX(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.getX(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.getX(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    inv1 = Vector3( vec_madd( tmp0.getY(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.getY(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.getY(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    inv2 = Vector3( vec_madd( tmp0.getZ(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.getZ(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.getZ(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return Transform3(
-        inv0,
-        inv1,
-        inv2,
-        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
-    );
-}
-
-inline const Transform3 orthoInverse( const Transform3 & tfrm )
-{
-    Vector3 inv0, inv1, inv2;
-    inv0 = Vector3( tfrm.getCol0().getX(), tfrm.getCol1().getX(), tfrm.getCol2().getX() );
-    inv1 = Vector3( tfrm.getCol0().getY(), tfrm.getCol1().getY(), tfrm.getCol2().getY() );
-    inv2 = Vector3( tfrm.getCol0().getZ(), tfrm.getCol1().getZ(), tfrm.getCol2().getZ() );
-    return Transform3(
-        inv0,
-        inv1,
-        inv2,
-        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
-    );
-}
-
-inline const Transform3 absPerElem( const Transform3 & tfrm )
-{
-    return Transform3(
-        absPerElem( tfrm.getCol0() ),
-        absPerElem( tfrm.getCol1() ),
-        absPerElem( tfrm.getCol2() ),
-        absPerElem( tfrm.getCol3() )
-    );
-}
-
-inline const Vector3 Transform3::operator *( const Vector3 & vec ) const
-{
-    return Vector3(
-        vec_add( vec_add( vec_madd( mCol0.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getX(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getX(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_add( vec_madd( mCol0.getY(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getY(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_add( vec_madd( mCol0.getZ(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getZ(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
-    );
-}
-
-inline const Point3 Transform3::operator *( const Point3 & pnt ) const
-{
-    return Point3(
-        vec_add( vec_add( vec_add( vec_madd( mCol0.getX(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getX(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getX(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getX() ),
-        vec_add( vec_add( vec_add( vec_madd( mCol0.getY(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getY(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getY(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getY() ),
-        vec_add( vec_add( vec_add( vec_madd( mCol0.getZ(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getZ(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getZ(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getZ() )
-    );
-}
-
-inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
-{
-    return Transform3(
-        ( *this * tfrm.mCol0 ),
-        ( *this * tfrm.mCol1 ),
-        ( *this * tfrm.mCol2 ),
-        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
-    );
-}
-
-inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
-{
-    *this = *this * tfrm;
-    return *this;
-}
-
-inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
-{
-    return Transform3(
-        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
-        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
-        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
-        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
-    );
-}
-
-inline const Transform3 Transform3::identity( )
-{
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
-{
-    mCol0 = tfrm.getCol0();
-    mCol1 = tfrm.getCol1();
-    mCol2 = tfrm.getCol2();
-    return *this;
-}
-
-inline const Matrix3 Transform3::getUpper3x3( ) const
-{
-    return Matrix3( mCol0, mCol1, mCol2 );
-}
-
-inline Transform3 & Transform3::setTranslation( const Vector3 & translateVec )
-{
-    mCol3 = translateVec;
-    return *this;
-}
-
-inline const Vector3 Transform3::getTranslation( ) const
-{
-    return mCol3;
-}
-
-inline const Transform3 Transform3::rotationX( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, s ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), c ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Transform3 Transform3::rotationY( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Transform3(
-        Vector3( c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ) ),
-        Vector3::yAxis( ),
-        Vector3( s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Transform3 Transform3::rotationZ( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Transform3(
-        Vector3( c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector3( negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector3::zAxis( ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Transform3 Transform3::rotationZYX( const Vector3 & radiansXYZ )
-{
-    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sincosf4( radiansXYZ.getX(), &sX, &cX );
-    sincosf4( radiansXYZ.getY(), &sY, &cY );
-    sincosf4( radiansXYZ.getZ(), &sZ, &cZ );
-    tmp0 = vec_madd( cZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmp1 = vec_madd( sZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    return Transform3(
-        Vector3( vec_madd( cZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), negatef4( sY ) ),
-        Vector3( vec_sub( vec_madd( tmp0, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( tmp1, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        Vector3( vec_add( vec_madd( tmp0, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( tmp1, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Transform3 Transform3::rotation( vec_float4 radians, const Vector3 & unitVec )
-{
-    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline const Transform3 Transform3::rotation( const Quat & unitQuat )
-{
-    return Transform3( Matrix3( unitQuat ), Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline const Transform3 Transform3::scale( const Vector3 & scaleVec )
-{
-    return Transform3(
-        Vector3( scaleVec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec.getZ() ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec )
-{
-    return Transform3(
-        ( tfrm.getCol0() * scaleVec.getX( ) ),
-        ( tfrm.getCol1() * scaleVec.getY( ) ),
-        ( tfrm.getCol2() * scaleVec.getZ( ) ),
-        tfrm.getCol3()
-    );
-}
-
-inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm )
-{
-    return Transform3(
-        mulPerElem( tfrm.getCol0(), scaleVec ),
-        mulPerElem( tfrm.getCol1(), scaleVec ),
-        mulPerElem( tfrm.getCol2(), scaleVec ),
-        mulPerElem( tfrm.getCol3(), scaleVec )
-    );
-}
-
-inline const Transform3 Transform3::translation( const Vector3 & translateVec )
-{
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( ),
-        translateVec
-    );
-}
-
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, vec_uint4 select1 )
-{
-    return Transform3(
-        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
-        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
-        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
-        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Transform3 & tfrm )
-{
-    Aos::Transform3 mat0, mat1, mat2, mat3;
-    tfrm.get4Aos( mat0, mat1, mat2, mat3 );
-    printf("slot 0:\n");
-    print( mat0 );
-    printf("slot 1:\n");
-    print( mat1 );
-    printf("slot 2:\n");
-    print( mat2 );
-    printf("slot 3:\n");
-    print( mat3 );
-}
-
-inline void print( const Transform3 & tfrm, const char * name )
-{
-    printf("%s:\n", name);
-    print( tfrm );
-}
-
-#endif
-
-inline Quat::Quat( const Matrix3 & tfrm )
-{
-    vec_float4 trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
-    vec_uint4 negTrace, ZgtX, ZgtY, YgtX;
-    vec_uint4 largestXorY, largestYorZ, largestZorX;
-
-    xx = tfrm.getCol0().getX();
-    yx = tfrm.getCol0().getY();
-    zx = tfrm.getCol0().getZ();
-    xy = tfrm.getCol1().getX();
-    yy = tfrm.getCol1().getY();
-    zy = tfrm.getCol1().getZ();
-    xz = tfrm.getCol2().getX();
-    yz = tfrm.getCol2().getY();
-    zz = tfrm.getCol2().getZ();
-
-    trace = vec_add( vec_add( xx, yy ), zz );
-
-    negTrace = (vec_uint4)vec_cmpgt( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), trace );
-    ZgtX = (vec_uint4)vec_cmpgt( zz, xx );
-    ZgtY = (vec_uint4)vec_cmpgt( zz, yy );
-    YgtX = (vec_uint4)vec_cmpgt( yy, xx );
-    largestXorY = vec_andc( negTrace, vec_and( ZgtX, ZgtY ) );
-    largestYorZ = vec_and( negTrace, vec_or( YgtX, ZgtX ) );
-    largestZorX = vec_andc( negTrace, vec_andc( YgtX, ZgtY ) );
-    
-    zz = vec_sel( zz, negatef4(zz), largestXorY );
-    xy = vec_sel( xy, negatef4(xy), largestXorY );
-    xx = vec_sel( xx, negatef4(xx), largestYorZ );
-    yz = vec_sel( yz, negatef4(yz), largestYorZ );
-    yy = vec_sel( yy, negatef4(yy), largestZorX );
-    zx = vec_sel( zx, negatef4(zx), largestZorX );
-
-    radicand = vec_add( vec_add( vec_add( xx, yy ), zz ), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-    scale = vec_madd( ((vec_float4){0.5f,0.5f,0.5f,0.5f}), divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( radicand ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-
-    tmpx = vec_madd( vec_sub( zy, yz ), scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmpy = vec_madd( vec_sub( xz, zx ), scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmpz = vec_madd( vec_sub( yx, xy ), scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmpw = vec_madd( radicand, scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qx = tmpx;
-    qy = tmpy;
-    qz = tmpz;
-    qw = tmpw;
-
-    qx = vec_sel( qx, tmpw, largestXorY );
-    qy = vec_sel( qy, tmpz, largestXorY );
-    qz = vec_sel( qz, tmpy, largestXorY );
-    qw = vec_sel( qw, tmpx, largestXorY );
-    tmpx = qx;
-    tmpz = qz;
-    qx = vec_sel( qx, qy, largestYorZ );
-    qy = vec_sel( qy, tmpx, largestYorZ );
-    qz = vec_sel( qz, qw, largestYorZ );
-    qw = vec_sel( qw, tmpz, largestYorZ );
-
-    mX = qx;
-    mY = qy;
-    mZ = qz;
-    mW = qw;
-}
-
-inline const Matrix3 outer( const Vector3 & tfrm0, const Vector3 & tfrm1 )
-{
-    return Matrix3(
-        ( tfrm0 * tfrm1.getX( ) ),
-        ( tfrm0 * tfrm1.getY( ) ),
-        ( tfrm0 * tfrm1.getZ( ) )
-    );
-}
-
-inline const Matrix4 outer( const Vector4 & tfrm0, const Vector4 & tfrm1 )
-{
-    return Matrix4(
-        ( tfrm0 * tfrm1.getX( ) ),
-        ( tfrm0 * tfrm1.getY( ) ),
-        ( tfrm0 * tfrm1.getZ( ) ),
-        ( tfrm0 * tfrm1.getW( ) )
-    );
-}
-
-inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat )
-{
-    return Vector3(
-        vec_add( vec_add( vec_madd( vec.getX(), mat.getCol0().getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec.getY(), mat.getCol0().getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( vec.getZ(), mat.getCol0().getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_add( vec_madd( vec.getX(), mat.getCol1().getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec.getY(), mat.getCol1().getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( vec.getZ(), mat.getCol1().getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_add( vec_madd( vec.getX(), mat.getCol2().getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec.getY(), mat.getCol2().getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( vec.getZ(), mat.getCol2().getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
-    );
-}
-
-inline const Matrix3 crossMatrix( const Vector3 & vec )
-{
-    return Matrix3(
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec.getZ(), negatef4( vec.getY() ) ),
-        Vector3( negatef4( vec.getZ() ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec.getX() ),
-        Vector3( vec.getY(), negatef4( vec.getX() ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat )
-{
-    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
-}
-
-} // namespace Soa
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_SOA_CPP_H
+#define _VECTORMATH_MAT_SOA_CPP_H
+
+namespace Vectormath {
+namespace Soa {
+
+//-----------------------------------------------------------------------------
+// Constants
+
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+inline Matrix3::Matrix3( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+}
+
+inline Matrix3::Matrix3( vec_float4 scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+}
+
+inline Matrix3::Matrix3( const Quat & unitQuat )
+{
+    vec_float4 qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
+    qx = unitQuat.getX();
+    qy = unitQuat.getY();
+    qz = unitQuat.getZ();
+    qw = unitQuat.getW();
+    qx2 = vec_add( qx, qx );
+    qy2 = vec_add( qy, qy );
+    qz2 = vec_add( qz, qz );
+    qxqx2 = vec_madd( qx, qx2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qxqy2 = vec_madd( qx, qy2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qxqz2 = vec_madd( qx, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qxqw2 = vec_madd( qw, qx2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qyqy2 = vec_madd( qy, qy2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qyqz2 = vec_madd( qy, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qyqw2 = vec_madd( qw, qy2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qzqz2 = vec_madd( qz, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qzqw2 = vec_madd( qw, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    mCol0 = Vector3( vec_sub( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), qyqy2 ), qzqz2 ), vec_add( qxqy2, qzqw2 ), vec_sub( qxqz2, qyqw2 ) );
+    mCol1 = Vector3( vec_sub( qxqy2, qzqw2 ), vec_sub( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), qxqx2 ), qzqz2 ), vec_add( qyqz2, qxqw2 ) );
+    mCol2 = Vector3( vec_add( qxqz2, qyqw2 ), vec_sub( qyqz2, qxqw2 ), vec_sub( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), qxqx2 ), qyqy2 ) );
+}
+
+inline Matrix3::Matrix3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+}
+
+inline Matrix3::Matrix3( const Aos::Matrix3 & mat )
+{
+    mCol0 = Vector3( mat.getCol0() );
+    mCol1 = Vector3( mat.getCol1() );
+    mCol2 = Vector3( mat.getCol2() );
+}
+
+inline Matrix3::Matrix3( const Aos::Matrix3 & mat0, const Aos::Matrix3 & mat1, const Aos::Matrix3 & mat2, const Aos::Matrix3 & mat3 )
+{
+    mCol0 = Vector3( mat0.getCol0(), mat1.getCol0(), mat2.getCol0(), mat3.getCol0() );
+    mCol1 = Vector3( mat0.getCol1(), mat1.getCol1(), mat2.getCol1(), mat3.getCol1() );
+    mCol2 = Vector3( mat0.getCol2(), mat1.getCol2(), mat2.getCol2(), mat3.getCol2() );
+}
+
+inline void Matrix3::get4Aos( Aos::Matrix3 & result0, Aos::Matrix3 & result1, Aos::Matrix3 & result2, Aos::Matrix3 & result3 ) const
+{
+    Aos::Vector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    mCol0.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol0( tmpV3_0 );
+    result1.setCol0( tmpV3_1 );
+    result2.setCol0( tmpV3_2 );
+    result3.setCol0( tmpV3_3 );
+    mCol1.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol1( tmpV3_0 );
+    result1.setCol1( tmpV3_1 );
+    result2.setCol1( tmpV3_2 );
+    result3.setCol1( tmpV3_3 );
+    mCol2.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol2( tmpV3_0 );
+    result1.setCol2( tmpV3_1 );
+    result2.setCol2( tmpV3_2 );
+    result3.setCol2( tmpV3_3 );
+}
+
+inline Matrix3 & Matrix3::setCol0( const Vector3 & _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol1( const Vector3 & _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol2( const Vector3 & _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol( int col, const Vector3 & vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setRow( int row, const Vector3 & vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setElem( int col, int row, vec_float4 val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline vec_float4 Matrix3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Matrix3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Matrix3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Matrix3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Matrix3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::getRow( int row ) const
+{
+    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
+}
+
+inline Vector3 & Matrix3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    return *this;
+}
+
+inline const Matrix3 transpose( const Matrix3 & mat )
+{
+    return Matrix3(
+        Vector3( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX() ),
+        Vector3( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY() ),
+        Vector3( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ() )
+    );
+}
+
+inline const Matrix3 inverse( const Matrix3 & mat )
+{
+    Vector3 tmp0, tmp1, tmp2;
+    vec_float4 detinv;
+    tmp0 = cross( mat.getCol1(), mat.getCol2() );
+    tmp1 = cross( mat.getCol2(), mat.getCol0() );
+    tmp2 = cross( mat.getCol0(), mat.getCol1() );
+    detinv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), dot( mat.getCol2(), tmp2 ) );
+    return Matrix3(
+        Vector3( vec_madd( tmp0.getX(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.getX(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.getX(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        Vector3( vec_madd( tmp0.getY(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.getY(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.getY(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        Vector3( vec_madd( tmp0.getZ(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.getZ(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.getZ(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
+    );
+}
+
+inline vec_float4 determinant( const Matrix3 & mat )
+{
+    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
+}
+
+inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 )
+    );
+}
+
+inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix3 Matrix3::operator -( ) const
+{
+    return Matrix3(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 )
+    );
+}
+
+inline const Matrix3 absPerElem( const Matrix3 & mat )
+{
+    return Matrix3(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::operator *( vec_float4 scalar ) const
+{
+    return Matrix3(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( vec_float4 scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix3 operator *( vec_float4 scalar, const Matrix3 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector3 Matrix3::operator *( const Vector3 & vec ) const
+{
+    return Vector3(
+        vec_add( vec_add( vec_madd( mCol0.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getX(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getX(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_add( vec_madd( mCol0.getY(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getY(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_add( vec_madd( mCol0.getZ(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getZ(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
+    );
+}
+
+inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
+{
+    return Matrix3(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::identity( )
+{
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationX( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, s ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), c )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationY( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Matrix3(
+        Vector3( c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ) ),
+        Vector3::yAxis( ),
+        Vector3( s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationZ( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Matrix3(
+        Vector3( c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector3( negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector3::zAxis( )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationZYX( const Vector3 & radiansXYZ )
+{
+    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sincosf4( radiansXYZ.getX(), &sX, &cX );
+    sincosf4( radiansXYZ.getY(), &sY, &cY );
+    sincosf4( radiansXYZ.getZ(), &sZ, &cZ );
+    tmp0 = vec_madd( cZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmp1 = vec_madd( sZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    return Matrix3(
+        Vector3( vec_madd( cZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), negatef4( sY ) ),
+        Vector3( vec_sub( vec_madd( tmp0, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( tmp1, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        Vector3( vec_add( vec_madd( tmp0, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( tmp1, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( vec_float4 radians, const Vector3 & unitVec )
+{
+    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
+    sincosf4( radians, &s, &c );
+    x = unitVec.getX();
+    y = unitVec.getY();
+    z = unitVec.getZ();
+    xy = vec_madd( x, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    yz = vec_madd( y, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    zx = vec_madd( z, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
+    return Matrix3(
+        Vector3( vec_add( vec_madd( vec_madd( x, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) ),
+        Vector3( vec_sub( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( y, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) ),
+        Vector3( vec_add( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( z, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( const Quat & unitQuat )
+{
+    return Matrix3( unitQuat );
+}
+
+inline const Matrix3 Matrix3::scale( const Vector3 & scaleVec )
+{
+    return Matrix3(
+        Vector3( scaleVec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec.getZ() )
+    );
+}
+
+inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec )
+{
+    return Matrix3(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) )
+    );
+}
+
+inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat )
+{
+    return Matrix3(
+        mulPerElem( mat.getCol0(), scaleVec ),
+        mulPerElem( mat.getCol1(), scaleVec ),
+        mulPerElem( mat.getCol2(), scaleVec )
+    );
+}
+
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, vec_uint4 select1 )
+{
+    return Matrix3(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix3 & mat )
+{
+    Aos::Matrix3 mat0, mat1, mat2, mat3;
+    mat.get4Aos( mat0, mat1, mat2, mat3 );
+    printf("slot 0:\n");
+    print( mat0 );
+    printf("slot 1:\n");
+    print( mat1 );
+    printf("slot 2:\n");
+    print( mat2 );
+    printf("slot 3:\n");
+    print( mat3 );
+}
+
+inline void print( const Matrix3 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Matrix4::Matrix4( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+}
+
+inline Matrix4::Matrix4( vec_float4 scalar )
+{
+    mCol0 = Vector4( scalar );
+    mCol1 = Vector4( scalar );
+    mCol2 = Vector4( scalar );
+    mCol3 = Vector4( scalar );
+}
+
+inline Matrix4::Matrix4( const Transform3 & mat )
+{
+    mCol0 = Vector4( mat.getCol0(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    mCol1 = Vector4( mat.getCol1(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    mCol2 = Vector4( mat.getCol2(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    mCol3 = Vector4( mat.getCol3(), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+inline Matrix4::Matrix4( const Vector4 & _col0, const Vector4 & _col1, const Vector4 & _col2, const Vector4 & _col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Matrix4::Matrix4( const Matrix3 & mat, const Vector3 & translateVec )
+{
+    mCol0 = Vector4( mat.getCol0(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    mCol1 = Vector4( mat.getCol1(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    mCol2 = Vector4( mat.getCol2(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    mCol3 = Vector4( translateVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+inline Matrix4::Matrix4( const Quat & unitQuat, const Vector3 & translateVec )
+{
+    Matrix3 mat;
+    mat = Matrix3( unitQuat );
+    mCol0 = Vector4( mat.getCol0(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    mCol1 = Vector4( mat.getCol1(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    mCol2 = Vector4( mat.getCol2(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    mCol3 = Vector4( translateVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+inline Matrix4::Matrix4( const Aos::Matrix4 & mat )
+{
+    mCol0 = Vector4( mat.getCol0() );
+    mCol1 = Vector4( mat.getCol1() );
+    mCol2 = Vector4( mat.getCol2() );
+    mCol3 = Vector4( mat.getCol3() );
+}
+
+inline Matrix4::Matrix4( const Aos::Matrix4 & mat0, const Aos::Matrix4 & mat1, const Aos::Matrix4 & mat2, const Aos::Matrix4 & mat3 )
+{
+    mCol0 = Vector4( mat0.getCol0(), mat1.getCol0(), mat2.getCol0(), mat3.getCol0() );
+    mCol1 = Vector4( mat0.getCol1(), mat1.getCol1(), mat2.getCol1(), mat3.getCol1() );
+    mCol2 = Vector4( mat0.getCol2(), mat1.getCol2(), mat2.getCol2(), mat3.getCol2() );
+    mCol3 = Vector4( mat0.getCol3(), mat1.getCol3(), mat2.getCol3(), mat3.getCol3() );
+}
+
+inline void Matrix4::get4Aos( Aos::Matrix4 & result0, Aos::Matrix4 & result1, Aos::Matrix4 & result2, Aos::Matrix4 & result3 ) const
+{
+    Aos::Vector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
+    mCol0.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
+    result0.setCol0( tmpV4_0 );
+    result1.setCol0( tmpV4_1 );
+    result2.setCol0( tmpV4_2 );
+    result3.setCol0( tmpV4_3 );
+    mCol1.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
+    result0.setCol1( tmpV4_0 );
+    result1.setCol1( tmpV4_1 );
+    result2.setCol1( tmpV4_2 );
+    result3.setCol1( tmpV4_3 );
+    mCol2.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
+    result0.setCol2( tmpV4_0 );
+    result1.setCol2( tmpV4_1 );
+    result2.setCol2( tmpV4_2 );
+    result3.setCol2( tmpV4_3 );
+    mCol3.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
+    result0.setCol3( tmpV4_0 );
+    result1.setCol3( tmpV4_1 );
+    result2.setCol3( tmpV4_2 );
+    result3.setCol3( tmpV4_3 );
+}
+
+inline Matrix4 & Matrix4::setCol0( const Vector4 & _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol1( const Vector4 & _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol2( const Vector4 & _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol3( const Vector4 & _col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol( int col, const Vector4 & vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setRow( int row, const Vector4 & vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setElem( int col, int row, vec_float4 val )
+{
+    Vector4 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline vec_float4 Matrix4::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector4 Matrix4::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector4 Matrix4::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector4 Matrix4::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector4 Matrix4::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector4 Matrix4::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector4 & Matrix4::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+    return *this;
+}
+
+inline const Matrix4 transpose( const Matrix4 & mat )
+{
+    return Matrix4(
+        Vector4( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX(), mat.getCol3().getX() ),
+        Vector4( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY(), mat.getCol3().getY() ),
+        Vector4( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ(), mat.getCol3().getZ() ),
+        Vector4( mat.getCol0().getW(), mat.getCol1().getW(), mat.getCol2().getW(), mat.getCol3().getW() )
+    );
+}
+
+inline const Matrix4 inverse( const Matrix4 & mat )
+{
+    Vector4 res0, res1, res2, res3;
+    vec_float4 mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
+    mA = mat.getCol0().getX();
+    mB = mat.getCol0().getY();
+    mC = mat.getCol0().getZ();
+    mD = mat.getCol0().getW();
+    mE = mat.getCol1().getX();
+    mF = mat.getCol1().getY();
+    mG = mat.getCol1().getZ();
+    mH = mat.getCol1().getW();
+    mI = mat.getCol2().getX();
+    mJ = mat.getCol2().getY();
+    mK = mat.getCol2().getZ();
+    mL = mat.getCol2().getW();
+    mM = mat.getCol3().getX();
+    mN = mat.getCol3().getY();
+    mO = mat.getCol3().getZ();
+    mP = mat.getCol3().getW();
+    tmp0 = vec_sub( vec_madd( mK, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp1 = vec_sub( vec_madd( mO, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp2 = vec_sub( vec_madd( mB, mK, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mJ, mC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp3 = vec_sub( vec_madd( mF, mO, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mN, mG, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp4 = vec_sub( vec_madd( mJ, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mB, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp5 = vec_sub( vec_madd( mN, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mF, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res0.setX( vec_sub( vec_sub( vec_madd( mJ, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mL, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mK, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    res0.setY( vec_sub( vec_sub( vec_madd( mN, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mP, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mO, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    res0.setZ( vec_sub( vec_add( vec_madd( mD, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mB, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    res0.setW( vec_sub( vec_add( vec_madd( mH, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mF, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    detInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_add( vec_add( vec_add( vec_madd( mA, res0.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, res0.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mI, res0.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mM, res0.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    res1.setX( vec_madd( mI, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res1.setY( vec_madd( mM, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res1.setZ( vec_madd( mA, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res1.setW( vec_madd( mE, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res3.setX( vec_madd( mI, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res3.setY( vec_madd( mM, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res3.setZ( vec_madd( mA, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res3.setW( vec_madd( mE, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res2.setX( vec_madd( mI, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res2.setY( vec_madd( mM, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res2.setZ( vec_madd( mA, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res2.setW( vec_madd( mE, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp0 = vec_sub( vec_madd( mI, mB, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mA, mJ, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp1 = vec_sub( vec_madd( mM, mF, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, mN, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp2 = vec_sub( vec_madd( mI, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mA, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp3 = vec_sub( vec_madd( mM, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp4 = vec_sub( vec_madd( mI, mC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mA, mK, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp5 = vec_sub( vec_madd( mM, mG, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, mO, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res2.setX( vec_add( vec_sub( vec_madd( mL, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mJ, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.getX() ) );
+    res2.setY( vec_add( vec_sub( vec_madd( mP, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mN, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.getY() ) );
+    res2.setZ( vec_sub( vec_sub( vec_madd( mB, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mD, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.getZ() ) );
+    res2.setW( vec_sub( vec_sub( vec_madd( mF, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mH, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.getW() ) );
+    res3.setX( vec_add( vec_sub( vec_madd( mJ, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mK, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.getX() ) );
+    res3.setY( vec_add( vec_sub( vec_madd( mN, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mO, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.getY() ) );
+    res3.setZ( vec_sub( vec_sub( vec_madd( mC, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mB, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.getZ() ) );
+    res3.setW( vec_sub( vec_sub( vec_madd( mG, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mF, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.getW() ) );
+    res1.setX( vec_sub( vec_sub( vec_madd( mK, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mL, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.getX() ) );
+    res1.setY( vec_sub( vec_sub( vec_madd( mO, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mP, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.getY() ) );
+    res1.setZ( vec_add( vec_sub( vec_madd( mD, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.getZ() ) );
+    res1.setW( vec_add( vec_sub( vec_madd( mH, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.getW() ) );
+    return Matrix4(
+        ( res0 * detInv ),
+        ( res1 * detInv ),
+        ( res2 * detInv ),
+        ( res3 * detInv )
+    );
+}
+
+inline const Matrix4 affineInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( inverse( affineMat ) );
+}
+
+inline const Matrix4 orthoInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( orthoInverse( affineMat ) );
+}
+
+inline vec_float4 determinant( const Matrix4 & mat )
+{
+    vec_float4 dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    mA = mat.getCol0().getX();
+    mB = mat.getCol0().getY();
+    mC = mat.getCol0().getZ();
+    mD = mat.getCol0().getW();
+    mE = mat.getCol1().getX();
+    mF = mat.getCol1().getY();
+    mG = mat.getCol1().getZ();
+    mH = mat.getCol1().getW();
+    mI = mat.getCol2().getX();
+    mJ = mat.getCol2().getY();
+    mK = mat.getCol2().getZ();
+    mL = mat.getCol2().getW();
+    mM = mat.getCol3().getX();
+    mN = mat.getCol3().getY();
+    mO = mat.getCol3().getZ();
+    mP = mat.getCol3().getW();
+    tmp0 = vec_sub( vec_madd( mK, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp1 = vec_sub( vec_madd( mO, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp2 = vec_sub( vec_madd( mB, mK, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mJ, mC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp3 = vec_sub( vec_madd( mF, mO, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mN, mG, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp4 = vec_sub( vec_madd( mJ, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mB, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp5 = vec_sub( vec_madd( mN, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mF, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    dx = vec_sub( vec_sub( vec_madd( mJ, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mL, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mK, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    dy = vec_sub( vec_sub( vec_madd( mN, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mP, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mO, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    dz = vec_sub( vec_add( vec_madd( mD, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mB, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    dw = vec_sub( vec_add( vec_madd( mH, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mF, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return vec_add( vec_add( vec_add( vec_madd( mA, dx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, dy, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mI, dz, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mM, dw, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 ),
+        ( mCol3 + mat.mCol3 )
+    );
+}
+
+inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 ),
+        ( mCol3 - mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator -( ) const
+{
+    return Matrix4(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 ),
+        ( -mCol3 )
+    );
+}
+
+inline const Matrix4 absPerElem( const Matrix4 & mat )
+{
+    return Matrix4(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() ),
+        absPerElem( mat.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::operator *( vec_float4 scalar ) const
+{
+    return Matrix4(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar ),
+        ( mCol3 * scalar )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( vec_float4 scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix4 operator *( vec_float4 scalar, const Matrix4 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector4 Matrix4::operator *( const Vector4 & vec ) const
+{
+    return Vector4(
+        vec_add( vec_add( vec_add( vec_madd( mCol0.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getX(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getX(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol3.getX(), vec.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_add( vec_add( vec_madd( mCol0.getY(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getY(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol3.getY(), vec.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_add( vec_add( vec_madd( mCol0.getZ(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getZ(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol3.getZ(), vec.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_add( vec_add( vec_madd( mCol0.getW(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getW(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getW(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol3.getW(), vec.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
+    );
+}
+
+inline const Vector4 Matrix4::operator *( const Vector3 & vec ) const
+{
+    return Vector4(
+        vec_add( vec_add( vec_madd( mCol0.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getX(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getX(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_add( vec_madd( mCol0.getY(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getY(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_add( vec_madd( mCol0.getZ(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getZ(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_add( vec_madd( mCol0.getW(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getW(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getW(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
+    );
+}
+
+inline const Vector4 Matrix4::operator *( const Point3 & pnt ) const
+{
+    return Vector4(
+        vec_add( vec_add( vec_add( vec_madd( mCol0.getX(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getX(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getX(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getX() ),
+        vec_add( vec_add( vec_add( vec_madd( mCol0.getY(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getY(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getY(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getY() ),
+        vec_add( vec_add( vec_add( vec_madd( mCol0.getZ(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getZ(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getZ(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getZ() ),
+        vec_add( vec_add( vec_add( vec_madd( mCol0.getW(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getW(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getW(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getW() )
+    );
+}
+
+inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 ),
+        ( *this * mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
+{
+    return Matrix4(
+        ( *this * tfrm.getCol0() ),
+        ( *this * tfrm.getCol1() ),
+        ( *this * tfrm.getCol2() ),
+        ( *this * Point3( tfrm.getCol3() ) )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
+{
+    return Matrix4(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
+        mulPerElem( mat0.getCol3(), mat1.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::identity( )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
+{
+    mCol0.setXYZ( mat3.getCol0() );
+    mCol1.setXYZ( mat3.getCol1() );
+    mCol2.setXYZ( mat3.getCol2() );
+    return *this;
+}
+
+inline const Matrix3 Matrix4::getUpper3x3( ) const
+{
+    return Matrix3(
+        mCol0.getXYZ( ),
+        mCol1.getXYZ( ),
+        mCol2.getXYZ( )
+    );
+}
+
+inline Matrix4 & Matrix4::setTranslation( const Vector3 & translateVec )
+{
+    mCol3.setXYZ( translateVec );
+    return *this;
+}
+
+inline const Vector3 Matrix4::getTranslation( ) const
+{
+    return mCol3.getXYZ( );
+}
+
+inline const Matrix4 Matrix4::rotationX( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationY( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Matrix4(
+        Vector4( c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4::yAxis( ),
+        Vector4( s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZ( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Matrix4(
+        Vector4( c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZYX( const Vector3 & radiansXYZ )
+{
+    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sincosf4( radiansXYZ.getX(), &sX, &cX );
+    sincosf4( radiansXYZ.getY(), &sY, &cY );
+    sincosf4( radiansXYZ.getZ(), &sZ, &cZ );
+    tmp0 = vec_madd( cZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmp1 = vec_madd( sZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    return Matrix4(
+        Vector4( vec_madd( cZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), negatef4( sY ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( vec_sub( vec_madd( tmp0, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( tmp1, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( vec_add( vec_madd( tmp0, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( tmp1, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( vec_float4 radians, const Vector3 & unitVec )
+{
+    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
+    sincosf4( radians, &s, &c );
+    x = unitVec.getX();
+    y = unitVec.getY();
+    z = unitVec.getZ();
+    xy = vec_madd( x, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    yz = vec_madd( y, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    zx = vec_madd( z, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
+    return Matrix4(
+        Vector4( vec_add( vec_madd( vec_madd( x, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( vec_sub( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( y, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( vec_add( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( z, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( const Quat & unitQuat )
+{
+    return Matrix4( Transform3::rotation( unitQuat ) );
+}
+
+inline const Matrix4 Matrix4::scale( const Vector3 & scaleVec )
+{
+    return Matrix4(
+        Vector4( scaleVec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec )
+{
+    return Matrix4(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) ),
+        mat.getCol3()
+    );
+}
+
+inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat )
+{
+    Vector4 scale4;
+    scale4 = Vector4( scaleVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+    return Matrix4(
+        mulPerElem( mat.getCol0(), scale4 ),
+        mulPerElem( mat.getCol1(), scale4 ),
+        mulPerElem( mat.getCol2(), scale4 ),
+        mulPerElem( mat.getCol3(), scale4 )
+    );
+}
+
+inline const Matrix4 Matrix4::translation( const Vector3 & translateVec )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4( translateVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) )
+    );
+}
+
+inline const Matrix4 Matrix4::lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec )
+{
+    Matrix4 m4EyeFrame;
+    Vector3 v3X, v3Y, v3Z;
+    v3Y = normalize( upVec );
+    v3Z = normalize( ( eyePos - lookAtPos ) );
+    v3X = normalize( cross( v3Y, v3Z ) );
+    v3Y = cross( v3Z, v3X );
+    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
+    return orthoInverse( m4EyeFrame );
+}
+
+inline const Matrix4 Matrix4::perspective( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar )
+{
+    vec_float4 f, rangeInv;
+    f = tanf4( vec_sub( ((vec_float4){_VECTORMATH_PI_OVER_2,_VECTORMATH_PI_OVER_2,_VECTORMATH_PI_OVER_2,_VECTORMATH_PI_OVER_2}), vec_madd( ((vec_float4){0.5f,0.5f,0.5f,0.5f}), fovyRadians, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    rangeInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( zNear, zFar ) );
+    return Matrix4(
+        Vector4( divf4( f, aspect ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), f, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( vec_add( zNear, zFar ), rangeInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){-1.0f,-1.0f,-1.0f,-1.0f}) ),
+        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( vec_madd( vec_madd( zNear, zFar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), rangeInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){2.0f,2.0f,2.0f,2.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Matrix4 Matrix4::frustum( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
+{
+    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
+    sum_rl = vec_add( right, left );
+    sum_tb = vec_add( top, bottom );
+    sum_nf = vec_add( zNear, zFar );
+    inv_rl = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( right, left ) );
+    inv_tb = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( top, bottom ) );
+    inv_nf = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( zNear, zFar ) );
+    n2 = vec_add( zNear, zNear );
+    return Matrix4(
+        Vector4( vec_madd( n2, inv_rl, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( n2, inv_tb, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( vec_madd( sum_rl, inv_rl, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sum_tb, inv_tb, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sum_nf, inv_nf, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){-1.0f,-1.0f,-1.0f,-1.0f}) ),
+        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( vec_madd( n2, inv_nf, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), zFar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Matrix4 Matrix4::orthographic( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
+{
+    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
+    sum_rl = vec_add( right, left );
+    sum_tb = vec_add( top, bottom );
+    sum_nf = vec_add( zNear, zFar );
+    inv_rl = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( right, left ) );
+    inv_tb = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( top, bottom ) );
+    inv_nf = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( zNear, zFar ) );
+    return Matrix4(
+        Vector4( vec_add( inv_rl, inv_rl ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_add( inv_tb, inv_tb ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_add( inv_nf, inv_nf ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( vec_madd( negatef4( sum_rl ), inv_rl, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( negatef4( sum_tb ), inv_tb, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sum_nf, inv_nf, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) )
+    );
+}
+
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, vec_uint4 select1 )
+{
+    return Matrix4(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 ),
+        select( mat0.getCol3(), mat1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix4 & mat )
+{
+    Aos::Matrix4 mat0, mat1, mat2, mat3;
+    mat.get4Aos( mat0, mat1, mat2, mat3 );
+    printf("slot 0:\n");
+    print( mat0 );
+    printf("slot 1:\n");
+    print( mat1 );
+    printf("slot 2:\n");
+    print( mat2 );
+    printf("slot 3:\n");
+    print( mat3 );
+}
+
+inline void print( const Matrix4 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Transform3::Transform3( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+}
+
+inline Transform3::Transform3( vec_float4 scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+    mCol3 = Vector3( scalar );
+}
+
+inline Transform3::Transform3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2, const Vector3 & _col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Transform3::Transform3( const Matrix3 & tfrm, const Vector3 & translateVec )
+{
+    this->setUpper3x3( tfrm );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3::Transform3( const Quat & unitQuat, const Vector3 & translateVec )
+{
+    this->setUpper3x3( Matrix3( unitQuat ) );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3::Transform3( const Aos::Transform3 & tfrm )
+{
+    mCol0 = Vector3( tfrm.getCol0() );
+    mCol1 = Vector3( tfrm.getCol1() );
+    mCol2 = Vector3( tfrm.getCol2() );
+    mCol3 = Vector3( tfrm.getCol3() );
+}
+
+inline Transform3::Transform3( const Aos::Transform3 & tfrm0, const Aos::Transform3 & tfrm1, const Aos::Transform3 & tfrm2, const Aos::Transform3 & tfrm3 )
+{
+    mCol0 = Vector3( tfrm0.getCol0(), tfrm1.getCol0(), tfrm2.getCol0(), tfrm3.getCol0() );
+    mCol1 = Vector3( tfrm0.getCol1(), tfrm1.getCol1(), tfrm2.getCol1(), tfrm3.getCol1() );
+    mCol2 = Vector3( tfrm0.getCol2(), tfrm1.getCol2(), tfrm2.getCol2(), tfrm3.getCol2() );
+    mCol3 = Vector3( tfrm0.getCol3(), tfrm1.getCol3(), tfrm2.getCol3(), tfrm3.getCol3() );
+}
+
+inline void Transform3::get4Aos( Aos::Transform3 & result0, Aos::Transform3 & result1, Aos::Transform3 & result2, Aos::Transform3 & result3 ) const
+{
+    Aos::Vector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    mCol0.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol0( tmpV3_0 );
+    result1.setCol0( tmpV3_1 );
+    result2.setCol0( tmpV3_2 );
+    result3.setCol0( tmpV3_3 );
+    mCol1.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol1( tmpV3_0 );
+    result1.setCol1( tmpV3_1 );
+    result2.setCol1( tmpV3_2 );
+    result3.setCol1( tmpV3_3 );
+    mCol2.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol2( tmpV3_0 );
+    result1.setCol2( tmpV3_1 );
+    result2.setCol2( tmpV3_2 );
+    result3.setCol2( tmpV3_3 );
+    mCol3.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol3( tmpV3_0 );
+    result1.setCol3( tmpV3_1 );
+    result2.setCol3( tmpV3_2 );
+    result3.setCol3( tmpV3_3 );
+}
+
+inline Transform3 & Transform3::setCol0( const Vector3 & _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol1( const Vector3 & _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol2( const Vector3 & _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol3( const Vector3 & _col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol( int col, const Vector3 & vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Transform3 & Transform3::setRow( int row, const Vector4 & vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Transform3 & Transform3::setElem( int col, int row, vec_float4 val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline vec_float4 Transform3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Transform3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Transform3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Transform3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Transform3::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector3 Transform3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Transform3::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector3 & Transform3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Transform3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+    return *this;
+}
+
+inline const Transform3 inverse( const Transform3 & tfrm )
+{
+    Vector3 tmp0, tmp1, tmp2, inv0, inv1, inv2;
+    vec_float4 detinv;
+    tmp0 = cross( tfrm.getCol1(), tfrm.getCol2() );
+    tmp1 = cross( tfrm.getCol2(), tfrm.getCol0() );
+    tmp2 = cross( tfrm.getCol0(), tfrm.getCol1() );
+    detinv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), dot( tfrm.getCol2(), tmp2 ) );
+    inv0 = Vector3( vec_madd( tmp0.getX(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.getX(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.getX(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    inv1 = Vector3( vec_madd( tmp0.getY(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.getY(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.getY(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    inv2 = Vector3( vec_madd( tmp0.getZ(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.getZ(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.getZ(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return Transform3(
+        inv0,
+        inv1,
+        inv2,
+        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
+    );
+}
+
+inline const Transform3 orthoInverse( const Transform3 & tfrm )
+{
+    Vector3 inv0, inv1, inv2;
+    inv0 = Vector3( tfrm.getCol0().getX(), tfrm.getCol1().getX(), tfrm.getCol2().getX() );
+    inv1 = Vector3( tfrm.getCol0().getY(), tfrm.getCol1().getY(), tfrm.getCol2().getY() );
+    inv2 = Vector3( tfrm.getCol0().getZ(), tfrm.getCol1().getZ(), tfrm.getCol2().getZ() );
+    return Transform3(
+        inv0,
+        inv1,
+        inv2,
+        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
+    );
+}
+
+inline const Transform3 absPerElem( const Transform3 & tfrm )
+{
+    return Transform3(
+        absPerElem( tfrm.getCol0() ),
+        absPerElem( tfrm.getCol1() ),
+        absPerElem( tfrm.getCol2() ),
+        absPerElem( tfrm.getCol3() )
+    );
+}
+
+inline const Vector3 Transform3::operator *( const Vector3 & vec ) const
+{
+    return Vector3(
+        vec_add( vec_add( vec_madd( mCol0.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getX(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getX(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_add( vec_madd( mCol0.getY(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getY(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_add( vec_madd( mCol0.getZ(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getZ(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
+    );
+}
+
+inline const Point3 Transform3::operator *( const Point3 & pnt ) const
+{
+    return Point3(
+        vec_add( vec_add( vec_add( vec_madd( mCol0.getX(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getX(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getX(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getX() ),
+        vec_add( vec_add( vec_add( vec_madd( mCol0.getY(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getY(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getY(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getY() ),
+        vec_add( vec_add( vec_add( vec_madd( mCol0.getZ(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getZ(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getZ(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getZ() )
+    );
+}
+
+inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
+{
+    return Transform3(
+        ( *this * tfrm.mCol0 ),
+        ( *this * tfrm.mCol1 ),
+        ( *this * tfrm.mCol2 ),
+        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
+    );
+}
+
+inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
+{
+    return Transform3(
+        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
+        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
+        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
+        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
+    );
+}
+
+inline const Transform3 Transform3::identity( )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
+{
+    mCol0 = tfrm.getCol0();
+    mCol1 = tfrm.getCol1();
+    mCol2 = tfrm.getCol2();
+    return *this;
+}
+
+inline const Matrix3 Transform3::getUpper3x3( ) const
+{
+    return Matrix3( mCol0, mCol1, mCol2 );
+}
+
+inline Transform3 & Transform3::setTranslation( const Vector3 & translateVec )
+{
+    mCol3 = translateVec;
+    return *this;
+}
+
+inline const Vector3 Transform3::getTranslation( ) const
+{
+    return mCol3;
+}
+
+inline const Transform3 Transform3::rotationX( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, s ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), c ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Transform3 Transform3::rotationY( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Transform3(
+        Vector3( c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ) ),
+        Vector3::yAxis( ),
+        Vector3( s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Transform3 Transform3::rotationZ( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Transform3(
+        Vector3( c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector3( negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector3::zAxis( ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Transform3 Transform3::rotationZYX( const Vector3 & radiansXYZ )
+{
+    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sincosf4( radiansXYZ.getX(), &sX, &cX );
+    sincosf4( radiansXYZ.getY(), &sY, &cY );
+    sincosf4( radiansXYZ.getZ(), &sZ, &cZ );
+    tmp0 = vec_madd( cZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmp1 = vec_madd( sZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    return Transform3(
+        Vector3( vec_madd( cZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), negatef4( sY ) ),
+        Vector3( vec_sub( vec_madd( tmp0, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( tmp1, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        Vector3( vec_add( vec_madd( tmp0, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( tmp1, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Transform3 Transform3::rotation( vec_float4 radians, const Vector3 & unitVec )
+{
+    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline const Transform3 Transform3::rotation( const Quat & unitQuat )
+{
+    return Transform3( Matrix3( unitQuat ), Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline const Transform3 Transform3::scale( const Vector3 & scaleVec )
+{
+    return Transform3(
+        Vector3( scaleVec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec.getZ() ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec )
+{
+    return Transform3(
+        ( tfrm.getCol0() * scaleVec.getX( ) ),
+        ( tfrm.getCol1() * scaleVec.getY( ) ),
+        ( tfrm.getCol2() * scaleVec.getZ( ) ),
+        tfrm.getCol3()
+    );
+}
+
+inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm )
+{
+    return Transform3(
+        mulPerElem( tfrm.getCol0(), scaleVec ),
+        mulPerElem( tfrm.getCol1(), scaleVec ),
+        mulPerElem( tfrm.getCol2(), scaleVec ),
+        mulPerElem( tfrm.getCol3(), scaleVec )
+    );
+}
+
+inline const Transform3 Transform3::translation( const Vector3 & translateVec )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        translateVec
+    );
+}
+
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, vec_uint4 select1 )
+{
+    return Transform3(
+        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
+        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
+        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
+        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Transform3 & tfrm )
+{
+    Aos::Transform3 mat0, mat1, mat2, mat3;
+    tfrm.get4Aos( mat0, mat1, mat2, mat3 );
+    printf("slot 0:\n");
+    print( mat0 );
+    printf("slot 1:\n");
+    print( mat1 );
+    printf("slot 2:\n");
+    print( mat2 );
+    printf("slot 3:\n");
+    print( mat3 );
+}
+
+inline void print( const Transform3 & tfrm, const char * name )
+{
+    printf("%s:\n", name);
+    print( tfrm );
+}
+
+#endif
+
+inline Quat::Quat( const Matrix3 & tfrm )
+{
+    vec_float4 trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
+    vec_uint4 negTrace, ZgtX, ZgtY, YgtX;
+    vec_uint4 largestXorY, largestYorZ, largestZorX;
+
+    xx = tfrm.getCol0().getX();
+    yx = tfrm.getCol0().getY();
+    zx = tfrm.getCol0().getZ();
+    xy = tfrm.getCol1().getX();
+    yy = tfrm.getCol1().getY();
+    zy = tfrm.getCol1().getZ();
+    xz = tfrm.getCol2().getX();
+    yz = tfrm.getCol2().getY();
+    zz = tfrm.getCol2().getZ();
+
+    trace = vec_add( vec_add( xx, yy ), zz );
+
+    negTrace = (vec_uint4)vec_cmpgt( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), trace );
+    ZgtX = (vec_uint4)vec_cmpgt( zz, xx );
+    ZgtY = (vec_uint4)vec_cmpgt( zz, yy );
+    YgtX = (vec_uint4)vec_cmpgt( yy, xx );
+    largestXorY = vec_andc( negTrace, vec_and( ZgtX, ZgtY ) );
+    largestYorZ = vec_and( negTrace, vec_or( YgtX, ZgtX ) );
+    largestZorX = vec_andc( negTrace, vec_andc( YgtX, ZgtY ) );
+    
+    zz = vec_sel( zz, negatef4(zz), largestXorY );
+    xy = vec_sel( xy, negatef4(xy), largestXorY );
+    xx = vec_sel( xx, negatef4(xx), largestYorZ );
+    yz = vec_sel( yz, negatef4(yz), largestYorZ );
+    yy = vec_sel( yy, negatef4(yy), largestZorX );
+    zx = vec_sel( zx, negatef4(zx), largestZorX );
+
+    radicand = vec_add( vec_add( vec_add( xx, yy ), zz ), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+    scale = vec_madd( ((vec_float4){0.5f,0.5f,0.5f,0.5f}), divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( radicand ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+
+    tmpx = vec_madd( vec_sub( zy, yz ), scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmpy = vec_madd( vec_sub( xz, zx ), scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmpz = vec_madd( vec_sub( yx, xy ), scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmpw = vec_madd( radicand, scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qx = tmpx;
+    qy = tmpy;
+    qz = tmpz;
+    qw = tmpw;
+
+    qx = vec_sel( qx, tmpw, largestXorY );
+    qy = vec_sel( qy, tmpz, largestXorY );
+    qz = vec_sel( qz, tmpy, largestXorY );
+    qw = vec_sel( qw, tmpx, largestXorY );
+    tmpx = qx;
+    tmpz = qz;
+    qx = vec_sel( qx, qy, largestYorZ );
+    qy = vec_sel( qy, tmpx, largestYorZ );
+    qz = vec_sel( qz, qw, largestYorZ );
+    qw = vec_sel( qw, tmpz, largestYorZ );
+
+    mX = qx;
+    mY = qy;
+    mZ = qz;
+    mW = qw;
+}
+
+inline const Matrix3 outer( const Vector3 & tfrm0, const Vector3 & tfrm1 )
+{
+    return Matrix3(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) )
+    );
+}
+
+inline const Matrix4 outer( const Vector4 & tfrm0, const Vector4 & tfrm1 )
+{
+    return Matrix4(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) ),
+        ( tfrm0 * tfrm1.getW( ) )
+    );
+}
+
+inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat )
+{
+    return Vector3(
+        vec_add( vec_add( vec_madd( vec.getX(), mat.getCol0().getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec.getY(), mat.getCol0().getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( vec.getZ(), mat.getCol0().getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_add( vec_madd( vec.getX(), mat.getCol1().getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec.getY(), mat.getCol1().getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( vec.getZ(), mat.getCol1().getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_add( vec_madd( vec.getX(), mat.getCol2().getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec.getY(), mat.getCol2().getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( vec.getZ(), mat.getCol2().getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
+    );
+}
+
+inline const Matrix3 crossMatrix( const Vector3 & vec )
+{
+    return Matrix3(
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec.getZ(), negatef4( vec.getY() ) ),
+        Vector3( negatef4( vec.getZ() ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec.getX() ),
+        Vector3( vec.getY(), negatef4( vec.getX() ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat )
+{
+    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
+}
+
+} // namespace Soa
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/quat_aos.h b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/quat_aos.h
index 4ab6ffe08..196a48d98 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/quat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/quat_aos.h
@@ -1,536 +1,536 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_AOS_CPP_H
-#define _VECTORMATH_QUAT_AOS_CPP_H
-//-----------------------------------------------------------------------------
-// Definitions
-
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-namespace Vectormath {
-namespace Aos {
-
-inline Quat::Quat( float _x, float _y, float _z, float _w )
-{
-    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) &
-        __builtin_constant_p(_z) & __builtin_constant_p(_w)) {
-        mVec128 = (vec_float4){_x, _y, _z, _w};
-    } else {
-        float *pf = (float *)&mVec128;
-        pf[0] = _x;
-        pf[1] = _y;
-        pf[2] = _z;
-        pf[3] = _w;
-    }
-}
-
-inline Quat::Quat( floatInVec _x, floatInVec _y, floatInVec _z, floatInVec _w )
-{
-    vec_float4 xz = vec_mergeh( _x.get128(), _z.get128() );
-    vec_float4 yw = vec_mergeh( _y.get128(), _w.get128() );
-    mVec128 = vec_mergeh( xz, yw );
-}
-
-inline Quat::Quat( Vector3 xyz, float _w )
-{
-    mVec128 = xyz.get128();
-    _vmathVfSetElement(mVec128, _w, 3);
-}
-
-inline Quat::Quat( Vector3 xyz, floatInVec _w )
-{
-    mVec128 = xyz.get128();
-    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
-}
-
-inline Quat::Quat( Vector4 vec )
-{
-    mVec128 = vec.get128();
-}
-
-inline Quat::Quat( float scalar )
-{
-    mVec128 = floatInVec(scalar).get128();
-}
-
-inline Quat::Quat( floatInVec scalar )
-{
-    mVec128 = scalar.get128();
-}
-
-inline Quat::Quat( vec_float4 vf4 )
-{
-    mVec128 = vf4;
-}
-
-inline const Quat Quat::identity( )
-{
-    return Quat( _VECTORMATH_UNIT_0001 );
-}
-
-inline const Quat lerp( float t, Quat quat0, Quat quat1 )
-{
-    return lerp( floatInVec(t), quat0, quat1 );
-}
-
-inline const Quat lerp( floatInVec t, Quat quat0, Quat quat1 )
-{
-    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
-}
-
-inline const Quat slerp( float t, Quat unitQuat0, Quat unitQuat1 )
-{
-    return slerp( floatInVec(t), unitQuat0, unitQuat1 );
-}
-
-inline const Quat slerp( floatInVec t, Quat unitQuat0, Quat unitQuat1 )
-{
-    Quat start;
-    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    vec_uint4 selectMask;
-    cosAngle = _vmathVfDot4( unitQuat0.get128(), unitQuat1.get128() );
-    cosAngle = vec_splat( cosAngle, 0 );
-    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), cosAngle );
-    cosAngle = vec_sel( cosAngle, negatef4( cosAngle ), selectMask );
-    start = Quat( vec_sel( unitQuat0.get128(), negatef4( unitQuat0.get128() ), selectMask ) );
-    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = t.get128();
-    oneMinusT = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
-    angles = vec_mergeh( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
-    angles = vec_mergeh( angles, oneMinusT );
-    angles = vec_madd( angles, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sines = sinf4( angles );
-    scales = divf4( sines, vec_splat( sines, 0 ) );
-    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
-    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
-    return Quat( vec_madd( start.get128(), scale0, vec_madd( unitQuat1.get128(), scale1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-}
-
-inline const Quat squad( float t, Quat unitQuat0, Quat unitQuat1, Quat unitQuat2, Quat unitQuat3 )
-{
-    return squad( floatInVec(t), unitQuat0, unitQuat1, unitQuat2, unitQuat3 );
-}
-
-inline const Quat squad( floatInVec t, Quat unitQuat0, Quat unitQuat1, Quat unitQuat2, Quat unitQuat3 )
-{
-    Quat tmp0, tmp1;
-    tmp0 = slerp( t, unitQuat0, unitQuat3 );
-    tmp1 = slerp( t, unitQuat1, unitQuat2 );
-    return slerp( ( ( floatInVec(2.0f) * t ) * ( floatInVec(1.0f) - t ) ), tmp0, tmp1 );
-}
-
-inline vec_float4 Quat::get128( ) const
-{
-    return mVec128;
-}
-
-inline Quat & Quat::operator =( Quat quat )
-{
-    mVec128 = quat.mVec128;
-    return *this;
-}
-
-inline Quat & Quat::setXYZ( Vector3 vec )
-{
-    mVec128 = vec_sel( vec.get128(), mVec128, _VECTORMATH_MASK_0x000F );
-    return *this;
-}
-
-inline const Vector3 Quat::getXYZ( ) const
-{
-    return Vector3( mVec128 );
-}
-
-inline Quat & Quat::setX( float _x )
-{
-    _vmathVfSetElement(mVec128, _x, 0);
-    return *this;
-}
-
-inline Quat & Quat::setX( floatInVec _x )
-{
-    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
-    return *this;
-}
-
-inline const floatInVec Quat::getX( ) const
-{
-    return floatInVec( mVec128, 0 );
-}
-
-inline Quat & Quat::setY( float _y )
-{
-    _vmathVfSetElement(mVec128, _y, 1);
-    return *this;
-}
-
-inline Quat & Quat::setY( floatInVec _y )
-{
-    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
-    return *this;
-}
-
-inline const floatInVec Quat::getY( ) const
-{
-    return floatInVec( mVec128, 1 );
-}
-
-inline Quat & Quat::setZ( float _z )
-{
-    _vmathVfSetElement(mVec128, _z, 2);
-    return *this;
-}
-
-inline Quat & Quat::setZ( floatInVec _z )
-{
-    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
-    return *this;
-}
-
-inline const floatInVec Quat::getZ( ) const
-{
-    return floatInVec( mVec128, 2 );
-}
-
-inline Quat & Quat::setW( float _w )
-{
-    _vmathVfSetElement(mVec128, _w, 3);
-    return *this;
-}
-
-inline Quat & Quat::setW( floatInVec _w )
-{
-    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
-    return *this;
-}
-
-inline const floatInVec Quat::getW( ) const
-{
-    return floatInVec( mVec128, 3 );
-}
-
-inline Quat & Quat::setElem( int idx, float value )
-{
-    _vmathVfSetElement(mVec128, value, idx);
-    return *this;
-}
-
-inline Quat & Quat::setElem( int idx, floatInVec value )
-{
-    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
-    return *this;
-}
-
-inline const floatInVec Quat::getElem( int idx ) const
-{
-    return floatInVec( mVec128, idx );
-}
-
-inline VecIdx Quat::operator []( int idx )
-{
-    return VecIdx( mVec128, idx );
-}
-
-inline const floatInVec Quat::operator []( int idx ) const
-{
-    return floatInVec( mVec128, idx );
-}
-
-inline const Quat Quat::operator +( Quat quat ) const
-{
-    return Quat( vec_add( mVec128, quat.mVec128 ) );
-}
-
-inline const Quat Quat::operator -( Quat quat ) const
-{
-    return Quat( vec_sub( mVec128, quat.mVec128 ) );
-}
-
-inline const Quat Quat::operator *( float scalar ) const
-{
-    return *this * floatInVec(scalar);
-}
-
-inline const Quat Quat::operator *( floatInVec scalar ) const
-{
-    return Quat( vec_madd( mVec128, scalar.get128(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline Quat & Quat::operator +=( Quat quat )
-{
-    *this = *this + quat;
-    return *this;
-}
-
-inline Quat & Quat::operator -=( Quat quat )
-{
-    *this = *this - quat;
-    return *this;
-}
-
-inline Quat & Quat::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline Quat & Quat::operator *=( floatInVec scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Quat Quat::operator /( float scalar ) const
-{
-    return *this / floatInVec(scalar);
-}
-
-inline const Quat Quat::operator /( floatInVec scalar ) const
-{
-    return Quat( divf4( mVec128, scalar.get128() ) );
-}
-
-inline Quat & Quat::operator /=( float scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline Quat & Quat::operator /=( floatInVec scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Quat Quat::operator -( ) const
-{
-    return Quat( negatef4( mVec128 ) );
-}
-
-inline const Quat operator *( float scalar, Quat quat )
-{
-    return floatInVec(scalar) * quat;
-}
-
-inline const Quat operator *( floatInVec scalar, Quat quat )
-{
-    return quat * scalar;
-}
-
-inline const floatInVec dot( Quat quat0, Quat quat1 )
-{
-    return floatInVec( _vmathVfDot4( quat0.get128(), quat1.get128() ), 0 );
-}
-
-inline const floatInVec norm( Quat quat )
-{
-    return floatInVec(  _vmathVfDot4( quat.get128(), quat.get128() ), 0 );
-}
-
-inline const floatInVec length( Quat quat )
-{
-    return floatInVec(  sqrtf4(_vmathVfDot4( quat.get128(), quat.get128() )), 0 );
-}
-
-inline const Quat normalize( Quat quat )
-{
-    vec_float4 dot = _vmathVfDot4( quat.get128(), quat.get128() );
-    return Quat( vec_madd( quat.get128(), rsqrtf4( dot ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline const Quat Quat::rotation( Vector3 unitVec0, Vector3 unitVec1 )
-{
-    Vector3 crossVec;
-    vec_float4 cosAngle, cosAngleX2Plus2, recipCosHalfAngleX2, cosHalfAngleX2, res;
-    cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
-    cosAngle = vec_splat( cosAngle, 0 );
-    cosAngleX2Plus2 = vec_madd( cosAngle, ((vec_float4){2.0f,2.0f,2.0f,2.0f}), ((vec_float4){2.0f,2.0f,2.0f,2.0f}) );
-    recipCosHalfAngleX2 = rsqrtf4( cosAngleX2Plus2 );
-    cosHalfAngleX2 = vec_madd( recipCosHalfAngleX2, cosAngleX2Plus2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    crossVec = cross( unitVec0, unitVec1 );
-    res = vec_madd( crossVec.get128(), recipCosHalfAngleX2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    res = vec_sel( res, vec_madd( cosHalfAngleX2, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), _VECTORMATH_MASK_0x000F );
-    return Quat( res );
-}
-
-inline const Quat Quat::rotation( float radians, Vector3 unitVec )
-{
-    return rotation( floatInVec(radians), unitVec );
-}
-
-inline const Quat Quat::rotation( floatInVec radians, Vector3 unitVec )
-{
-    vec_float4 s, c, angle, res;
-    angle = vec_madd( radians.get128(), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    res = vec_sel( vec_madd( unitVec.get128(), s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c, _VECTORMATH_MASK_0x000F );
-    return Quat( res );
-}
-
-inline const Quat Quat::rotationX( float radians )
-{
-    return rotationX( floatInVec(radians) );
-}
-
-inline const Quat Quat::rotationX( floatInVec radians )
-{
-    vec_float4 s, c, angle, res;
-    angle = vec_madd( radians.get128(), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    res = vec_sel( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, _VECTORMATH_MASK_0xF000 );
-    res = vec_sel( res, c, _VECTORMATH_MASK_0x000F );
-    return Quat( res );
-}
-
-inline const Quat Quat::rotationY( float radians )
-{
-    return rotationY( floatInVec(radians) );
-}
-
-inline const Quat Quat::rotationY( floatInVec radians )
-{
-    vec_float4 s, c, angle, res;
-    angle = vec_madd( radians.get128(), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    res = vec_sel( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, _VECTORMATH_MASK_0x0F00 );
-    res = vec_sel( res, c, _VECTORMATH_MASK_0x000F );
-    return Quat( res );
-}
-
-inline const Quat Quat::rotationZ( float radians )
-{
-    return rotationZ( floatInVec(radians) );
-}
-
-inline const Quat Quat::rotationZ( floatInVec radians )
-{
-    vec_float4 s, c, angle, res;
-    angle = vec_madd( radians.get128(), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    res = vec_sel( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, _VECTORMATH_MASK_0x00F0 );
-    res = vec_sel( res, c, _VECTORMATH_MASK_0x000F );
-    return Quat( res );
-}
-
-inline const Quat Quat::operator *( Quat quat ) const
-{
-    vec_float4 ldata, rdata, qv, tmp0, tmp1, tmp2, tmp3;
-    vec_float4 product, l_wxyz, r_wxyz, xy, qw;
-    ldata = mVec128;
-    rdata = quat.mVec128;
-    tmp0 = vec_perm( ldata, ldata, _VECTORMATH_PERM_YZXW );
-    tmp1 = vec_perm( rdata, rdata, _VECTORMATH_PERM_ZXYW );
-    tmp2 = vec_perm( ldata, ldata, _VECTORMATH_PERM_ZXYW );
-    tmp3 = vec_perm( rdata, rdata, _VECTORMATH_PERM_YZXW );
-    qv = vec_madd( vec_splat( ldata, 3 ), rdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qv = vec_madd( vec_splat( rdata, 3 ), ldata, qv );
-    qv = vec_madd( tmp0, tmp1, qv );
-    qv = vec_nmsub( tmp2, tmp3, qv );
-    product = vec_madd( ldata, rdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    l_wxyz = vec_sld( ldata, ldata, 12 );
-    r_wxyz = vec_sld( rdata, rdata, 12 );
-    qw = vec_nmsub( l_wxyz, r_wxyz, product );
-    xy = vec_madd( l_wxyz, r_wxyz, product );
-    qw = vec_sub( qw, vec_sld( xy, xy, 8 ) );
-    return Quat( vec_sel( qv, qw, _VECTORMATH_MASK_0x000F ) );
-}
-
-inline Quat & Quat::operator *=( Quat quat )
-{
-    *this = *this * quat;
-    return *this;
-}
-
-inline const Vector3 rotate( Quat quat, Vector3 vec )
-{
-    vec_float4 qdata, vdata, product, tmp0, tmp1, tmp2, tmp3, wwww, qv, qw, res;
-    qdata = quat.get128();
-    vdata = vec.get128();
-    tmp0 = vec_perm( qdata, qdata, _VECTORMATH_PERM_YZXW );
-    tmp1 = vec_perm( vdata, vdata, _VECTORMATH_PERM_ZXYW );
-    tmp2 = vec_perm( qdata, qdata, _VECTORMATH_PERM_ZXYW );
-    tmp3 = vec_perm( vdata, vdata, _VECTORMATH_PERM_YZXW );
-    wwww = vec_splat( qdata, 3 );
-    qv = vec_madd( wwww, vdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qv = vec_madd( tmp0, tmp1, qv );
-    qv = vec_nmsub( tmp2, tmp3, qv );
-    product = vec_madd( qdata, vdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qw = vec_madd( vec_sld( qdata, qdata, 4 ), vec_sld( vdata, vdata, 4 ), product );
-    qw = vec_add( vec_sld( product, product, 8 ), qw );
-    tmp1 = vec_perm( qv, qv, _VECTORMATH_PERM_ZXYW );
-    tmp3 = vec_perm( qv, qv, _VECTORMATH_PERM_YZXW );
-    res = vec_madd( vec_splat( qw, 0 ), qdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    res = vec_madd( wwww, qv, res );
-    res = vec_madd( tmp0, tmp1, res );
-    res = vec_nmsub( tmp2, tmp3, res );
-    return Vector3( res );
-}
-
-inline const Quat conj( Quat quat )
-{
-    return Quat( vec_xor( quat.get128(), ((vec_float4)(vec_int4){0x80000000,0x80000000,0x80000000,0}) ) );
-}
-
-inline const Quat select( Quat quat0, Quat quat1, bool select1 )
-{
-    return select( quat0, quat1, boolInVec(select1) );
-}
-
-inline const Quat select( Quat quat0, Quat quat1, boolInVec select1 )
-{
-    return Quat( vec_sel( quat0.get128(), quat1.get128(), select1.get128() ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( Quat quat )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = quat.get128();
-    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-inline void print( Quat quat, const char * name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = quat.get128();
-    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-#endif
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_AOS_CPP_H
+#define _VECTORMATH_QUAT_AOS_CPP_H
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+namespace Vectormath {
+namespace Aos {
+
+inline Quat::Quat( float _x, float _y, float _z, float _w )
+{
+    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) &
+        __builtin_constant_p(_z) & __builtin_constant_p(_w)) {
+        mVec128 = (vec_float4){_x, _y, _z, _w};
+    } else {
+        float *pf = (float *)&mVec128;
+        pf[0] = _x;
+        pf[1] = _y;
+        pf[2] = _z;
+        pf[3] = _w;
+    }
+}
+
+inline Quat::Quat( floatInVec _x, floatInVec _y, floatInVec _z, floatInVec _w )
+{
+    vec_float4 xz = vec_mergeh( _x.get128(), _z.get128() );
+    vec_float4 yw = vec_mergeh( _y.get128(), _w.get128() );
+    mVec128 = vec_mergeh( xz, yw );
+}
+
+inline Quat::Quat( Vector3 xyz, float _w )
+{
+    mVec128 = xyz.get128();
+    _vmathVfSetElement(mVec128, _w, 3);
+}
+
+inline Quat::Quat( Vector3 xyz, floatInVec _w )
+{
+    mVec128 = xyz.get128();
+    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
+}
+
+inline Quat::Quat( Vector4 vec )
+{
+    mVec128 = vec.get128();
+}
+
+inline Quat::Quat( float scalar )
+{
+    mVec128 = floatInVec(scalar).get128();
+}
+
+inline Quat::Quat( floatInVec scalar )
+{
+    mVec128 = scalar.get128();
+}
+
+inline Quat::Quat( vec_float4 vf4 )
+{
+    mVec128 = vf4;
+}
+
+inline const Quat Quat::identity( )
+{
+    return Quat( _VECTORMATH_UNIT_0001 );
+}
+
+inline const Quat lerp( float t, Quat quat0, Quat quat1 )
+{
+    return lerp( floatInVec(t), quat0, quat1 );
+}
+
+inline const Quat lerp( floatInVec t, Quat quat0, Quat quat1 )
+{
+    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
+}
+
+inline const Quat slerp( float t, Quat unitQuat0, Quat unitQuat1 )
+{
+    return slerp( floatInVec(t), unitQuat0, unitQuat1 );
+}
+
+inline const Quat slerp( floatInVec t, Quat unitQuat0, Quat unitQuat1 )
+{
+    Quat start;
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    vec_uint4 selectMask;
+    cosAngle = _vmathVfDot4( unitQuat0.get128(), unitQuat1.get128() );
+    cosAngle = vec_splat( cosAngle, 0 );
+    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), cosAngle );
+    cosAngle = vec_sel( cosAngle, negatef4( cosAngle ), selectMask );
+    start = Quat( vec_sel( unitQuat0.get128(), negatef4( unitQuat0.get128() ), selectMask ) );
+    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = t.get128();
+    oneMinusT = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
+    angles = vec_mergeh( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
+    angles = vec_mergeh( angles, oneMinusT );
+    angles = vec_madd( angles, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sines = sinf4( angles );
+    scales = divf4( sines, vec_splat( sines, 0 ) );
+    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
+    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
+    return Quat( vec_madd( start.get128(), scale0, vec_madd( unitQuat1.get128(), scale1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+}
+
+inline const Quat squad( float t, Quat unitQuat0, Quat unitQuat1, Quat unitQuat2, Quat unitQuat3 )
+{
+    return squad( floatInVec(t), unitQuat0, unitQuat1, unitQuat2, unitQuat3 );
+}
+
+inline const Quat squad( floatInVec t, Quat unitQuat0, Quat unitQuat1, Quat unitQuat2, Quat unitQuat3 )
+{
+    Quat tmp0, tmp1;
+    tmp0 = slerp( t, unitQuat0, unitQuat3 );
+    tmp1 = slerp( t, unitQuat1, unitQuat2 );
+    return slerp( ( ( floatInVec(2.0f) * t ) * ( floatInVec(1.0f) - t ) ), tmp0, tmp1 );
+}
+
+inline vec_float4 Quat::get128( ) const
+{
+    return mVec128;
+}
+
+inline Quat & Quat::operator =( Quat quat )
+{
+    mVec128 = quat.mVec128;
+    return *this;
+}
+
+inline Quat & Quat::setXYZ( Vector3 vec )
+{
+    mVec128 = vec_sel( vec.get128(), mVec128, _VECTORMATH_MASK_0x000F );
+    return *this;
+}
+
+inline const Vector3 Quat::getXYZ( ) const
+{
+    return Vector3( mVec128 );
+}
+
+inline Quat & Quat::setX( float _x )
+{
+    _vmathVfSetElement(mVec128, _x, 0);
+    return *this;
+}
+
+inline Quat & Quat::setX( floatInVec _x )
+{
+    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
+    return *this;
+}
+
+inline const floatInVec Quat::getX( ) const
+{
+    return floatInVec( mVec128, 0 );
+}
+
+inline Quat & Quat::setY( float _y )
+{
+    _vmathVfSetElement(mVec128, _y, 1);
+    return *this;
+}
+
+inline Quat & Quat::setY( floatInVec _y )
+{
+    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
+    return *this;
+}
+
+inline const floatInVec Quat::getY( ) const
+{
+    return floatInVec( mVec128, 1 );
+}
+
+inline Quat & Quat::setZ( float _z )
+{
+    _vmathVfSetElement(mVec128, _z, 2);
+    return *this;
+}
+
+inline Quat & Quat::setZ( floatInVec _z )
+{
+    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
+    return *this;
+}
+
+inline const floatInVec Quat::getZ( ) const
+{
+    return floatInVec( mVec128, 2 );
+}
+
+inline Quat & Quat::setW( float _w )
+{
+    _vmathVfSetElement(mVec128, _w, 3);
+    return *this;
+}
+
+inline Quat & Quat::setW( floatInVec _w )
+{
+    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
+    return *this;
+}
+
+inline const floatInVec Quat::getW( ) const
+{
+    return floatInVec( mVec128, 3 );
+}
+
+inline Quat & Quat::setElem( int idx, float value )
+{
+    _vmathVfSetElement(mVec128, value, idx);
+    return *this;
+}
+
+inline Quat & Quat::setElem( int idx, floatInVec value )
+{
+    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
+    return *this;
+}
+
+inline const floatInVec Quat::getElem( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+inline VecIdx Quat::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+inline const floatInVec Quat::operator []( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+inline const Quat Quat::operator +( Quat quat ) const
+{
+    return Quat( vec_add( mVec128, quat.mVec128 ) );
+}
+
+inline const Quat Quat::operator -( Quat quat ) const
+{
+    return Quat( vec_sub( mVec128, quat.mVec128 ) );
+}
+
+inline const Quat Quat::operator *( float scalar ) const
+{
+    return *this * floatInVec(scalar);
+}
+
+inline const Quat Quat::operator *( floatInVec scalar ) const
+{
+    return Quat( vec_madd( mVec128, scalar.get128(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline Quat & Quat::operator +=( Quat quat )
+{
+    *this = *this + quat;
+    return *this;
+}
+
+inline Quat & Quat::operator -=( Quat quat )
+{
+    *this = *this - quat;
+    return *this;
+}
+
+inline Quat & Quat::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline Quat & Quat::operator *=( floatInVec scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Quat Quat::operator /( float scalar ) const
+{
+    return *this / floatInVec(scalar);
+}
+
+inline const Quat Quat::operator /( floatInVec scalar ) const
+{
+    return Quat( divf4( mVec128, scalar.get128() ) );
+}
+
+inline Quat & Quat::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline Quat & Quat::operator /=( floatInVec scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Quat Quat::operator -( ) const
+{
+    return Quat( negatef4( mVec128 ) );
+}
+
+inline const Quat operator *( float scalar, Quat quat )
+{
+    return floatInVec(scalar) * quat;
+}
+
+inline const Quat operator *( floatInVec scalar, Quat quat )
+{
+    return quat * scalar;
+}
+
+inline const floatInVec dot( Quat quat0, Quat quat1 )
+{
+    return floatInVec( _vmathVfDot4( quat0.get128(), quat1.get128() ), 0 );
+}
+
+inline const floatInVec norm( Quat quat )
+{
+    return floatInVec(  _vmathVfDot4( quat.get128(), quat.get128() ), 0 );
+}
+
+inline const floatInVec length( Quat quat )
+{
+    return floatInVec(  sqrtf4(_vmathVfDot4( quat.get128(), quat.get128() )), 0 );
+}
+
+inline const Quat normalize( Quat quat )
+{
+    vec_float4 dot = _vmathVfDot4( quat.get128(), quat.get128() );
+    return Quat( vec_madd( quat.get128(), rsqrtf4( dot ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline const Quat Quat::rotation( Vector3 unitVec0, Vector3 unitVec1 )
+{
+    Vector3 crossVec;
+    vec_float4 cosAngle, cosAngleX2Plus2, recipCosHalfAngleX2, cosHalfAngleX2, res;
+    cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
+    cosAngle = vec_splat( cosAngle, 0 );
+    cosAngleX2Plus2 = vec_madd( cosAngle, ((vec_float4){2.0f,2.0f,2.0f,2.0f}), ((vec_float4){2.0f,2.0f,2.0f,2.0f}) );
+    recipCosHalfAngleX2 = rsqrtf4( cosAngleX2Plus2 );
+    cosHalfAngleX2 = vec_madd( recipCosHalfAngleX2, cosAngleX2Plus2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    crossVec = cross( unitVec0, unitVec1 );
+    res = vec_madd( crossVec.get128(), recipCosHalfAngleX2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    res = vec_sel( res, vec_madd( cosHalfAngleX2, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), _VECTORMATH_MASK_0x000F );
+    return Quat( res );
+}
+
+inline const Quat Quat::rotation( float radians, Vector3 unitVec )
+{
+    return rotation( floatInVec(radians), unitVec );
+}
+
+inline const Quat Quat::rotation( floatInVec radians, Vector3 unitVec )
+{
+    vec_float4 s, c, angle, res;
+    angle = vec_madd( radians.get128(), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    res = vec_sel( vec_madd( unitVec.get128(), s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c, _VECTORMATH_MASK_0x000F );
+    return Quat( res );
+}
+
+inline const Quat Quat::rotationX( float radians )
+{
+    return rotationX( floatInVec(radians) );
+}
+
+inline const Quat Quat::rotationX( floatInVec radians )
+{
+    vec_float4 s, c, angle, res;
+    angle = vec_madd( radians.get128(), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    res = vec_sel( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, _VECTORMATH_MASK_0xF000 );
+    res = vec_sel( res, c, _VECTORMATH_MASK_0x000F );
+    return Quat( res );
+}
+
+inline const Quat Quat::rotationY( float radians )
+{
+    return rotationY( floatInVec(radians) );
+}
+
+inline const Quat Quat::rotationY( floatInVec radians )
+{
+    vec_float4 s, c, angle, res;
+    angle = vec_madd( radians.get128(), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    res = vec_sel( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, _VECTORMATH_MASK_0x0F00 );
+    res = vec_sel( res, c, _VECTORMATH_MASK_0x000F );
+    return Quat( res );
+}
+
+inline const Quat Quat::rotationZ( float radians )
+{
+    return rotationZ( floatInVec(radians) );
+}
+
+inline const Quat Quat::rotationZ( floatInVec radians )
+{
+    vec_float4 s, c, angle, res;
+    angle = vec_madd( radians.get128(), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    res = vec_sel( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, _VECTORMATH_MASK_0x00F0 );
+    res = vec_sel( res, c, _VECTORMATH_MASK_0x000F );
+    return Quat( res );
+}
+
+inline const Quat Quat::operator *( Quat quat ) const
+{
+    vec_float4 ldata, rdata, qv, tmp0, tmp1, tmp2, tmp3;
+    vec_float4 product, l_wxyz, r_wxyz, xy, qw;
+    ldata = mVec128;
+    rdata = quat.mVec128;
+    tmp0 = vec_perm( ldata, ldata, _VECTORMATH_PERM_YZXW );
+    tmp1 = vec_perm( rdata, rdata, _VECTORMATH_PERM_ZXYW );
+    tmp2 = vec_perm( ldata, ldata, _VECTORMATH_PERM_ZXYW );
+    tmp3 = vec_perm( rdata, rdata, _VECTORMATH_PERM_YZXW );
+    qv = vec_madd( vec_splat( ldata, 3 ), rdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qv = vec_madd( vec_splat( rdata, 3 ), ldata, qv );
+    qv = vec_madd( tmp0, tmp1, qv );
+    qv = vec_nmsub( tmp2, tmp3, qv );
+    product = vec_madd( ldata, rdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    l_wxyz = vec_sld( ldata, ldata, 12 );
+    r_wxyz = vec_sld( rdata, rdata, 12 );
+    qw = vec_nmsub( l_wxyz, r_wxyz, product );
+    xy = vec_madd( l_wxyz, r_wxyz, product );
+    qw = vec_sub( qw, vec_sld( xy, xy, 8 ) );
+    return Quat( vec_sel( qv, qw, _VECTORMATH_MASK_0x000F ) );
+}
+
+inline Quat & Quat::operator *=( Quat quat )
+{
+    *this = *this * quat;
+    return *this;
+}
+
+inline const Vector3 rotate( Quat quat, Vector3 vec )
+{
+    vec_float4 qdata, vdata, product, tmp0, tmp1, tmp2, tmp3, wwww, qv, qw, res;
+    qdata = quat.get128();
+    vdata = vec.get128();
+    tmp0 = vec_perm( qdata, qdata, _VECTORMATH_PERM_YZXW );
+    tmp1 = vec_perm( vdata, vdata, _VECTORMATH_PERM_ZXYW );
+    tmp2 = vec_perm( qdata, qdata, _VECTORMATH_PERM_ZXYW );
+    tmp3 = vec_perm( vdata, vdata, _VECTORMATH_PERM_YZXW );
+    wwww = vec_splat( qdata, 3 );
+    qv = vec_madd( wwww, vdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qv = vec_madd( tmp0, tmp1, qv );
+    qv = vec_nmsub( tmp2, tmp3, qv );
+    product = vec_madd( qdata, vdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qw = vec_madd( vec_sld( qdata, qdata, 4 ), vec_sld( vdata, vdata, 4 ), product );
+    qw = vec_add( vec_sld( product, product, 8 ), qw );
+    tmp1 = vec_perm( qv, qv, _VECTORMATH_PERM_ZXYW );
+    tmp3 = vec_perm( qv, qv, _VECTORMATH_PERM_YZXW );
+    res = vec_madd( vec_splat( qw, 0 ), qdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    res = vec_madd( wwww, qv, res );
+    res = vec_madd( tmp0, tmp1, res );
+    res = vec_nmsub( tmp2, tmp3, res );
+    return Vector3( res );
+}
+
+inline const Quat conj( Quat quat )
+{
+    return Quat( vec_xor( quat.get128(), ((vec_float4)(vec_int4){0x80000000,0x80000000,0x80000000,0}) ) );
+}
+
+inline const Quat select( Quat quat0, Quat quat1, bool select1 )
+{
+    return select( quat0, quat1, boolInVec(select1) );
+}
+
+inline const Quat select( Quat quat0, Quat quat1, boolInVec select1 )
+{
+    return Quat( vec_sel( quat0.get128(), quat1.get128(), select1.get128() ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( Quat quat )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = quat.get128();
+    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+inline void print( Quat quat, const char * name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = quat.get128();
+    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/quat_soa.h b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/quat_soa.h
index 082eaed26..fb83e8121 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/quat_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/quat_soa.h
@@ -1,479 +1,479 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_SOA_CPP_H
-#define _VECTORMATH_QUAT_SOA_CPP_H
-//-----------------------------------------------------------------------------
-// Definitions
-
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-namespace Vectormath {
-namespace Soa {
-
-inline Quat::Quat( const Quat & quat )
-{
-    mX = quat.mX;
-    mY = quat.mY;
-    mZ = quat.mZ;
-    mW = quat.mW;
-}
-
-inline Quat::Quat( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-    mW = _w;
-}
-
-inline Quat::Quat( const Vector3 & xyz, vec_float4 _w )
-{
-    this->setXYZ( xyz );
-    this->setW( _w );
-}
-
-inline Quat::Quat( const Vector4 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    mW = vec.getW();
-}
-
-inline Quat::Quat( vec_float4 scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-    mW = scalar;
-}
-
-inline Quat::Quat( Aos::Quat quat )
-{
-    vec_float4 vec128 = quat.get128();
-    mX = vec_splat( vec128, 0 );
-    mY = vec_splat( vec128, 1 );
-    mZ = vec_splat( vec128, 2 );
-    mW = vec_splat( vec128, 3 );
-}
-
-inline Quat::Quat( Aos::Quat quat0, Aos::Quat quat1, Aos::Quat quat2, Aos::Quat quat3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = vec_mergeh( quat0.get128(), quat2.get128() );
-    tmp1 = vec_mergeh( quat1.get128(), quat3.get128() );
-    tmp2 = vec_mergel( quat0.get128(), quat2.get128() );
-    tmp3 = vec_mergel( quat1.get128(), quat3.get128() );
-    mX = vec_mergeh( tmp0, tmp1 );
-    mY = vec_mergel( tmp0, tmp1 );
-    mZ = vec_mergeh( tmp2, tmp3 );
-    mW = vec_mergel( tmp2, tmp3 );
-}
-
-inline const Quat Quat::identity( )
-{
-    return Quat( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-inline const Quat lerp( vec_float4 t, const Quat & quat0, const Quat & quat1 )
-{
-    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
-}
-
-inline const Quat slerp( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1 )
-{
-    Quat start;
-    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
-    vec_uint4 selectMask;
-    cosAngle = dot( unitQuat0, unitQuat1 );
-    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){0.0f,0.0f,0.0f,0.0f}, cosAngle );
-    cosAngle = vec_sel( cosAngle, negatef4( cosAngle ), selectMask );
-    start.setX( vec_sel( unitQuat0.getX(), negatef4( unitQuat0.getX() ), selectMask ) );
-    start.setY( vec_sel( unitQuat0.getY(), negatef4( unitQuat0.getY() ), selectMask ) );
-    start.setZ( vec_sel( unitQuat0.getZ(), negatef4( unitQuat0.getZ() ), selectMask ) );
-    start.setW( vec_sel( unitQuat0.getW(), negatef4( unitQuat0.getW() ), selectMask ) );
-    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
-    angle = acosf4( cosAngle );
-    recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
-    scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
-    scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
-    return ( ( start * scale0 ) + ( unitQuat1 * scale1 ) );
-}
-
-inline const Quat squad( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 )
-{
-    Quat tmp0, tmp1;
-    tmp0 = slerp( t, unitQuat0, unitQuat3 );
-    tmp1 = slerp( t, unitQuat1, unitQuat2 );
-    return slerp( vec_madd( vec_madd( ((vec_float4){2.0f,2.0f,2.0f,2.0f}), t, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), tmp0, tmp1 );
-}
-
-inline void Quat::get4Aos( Aos::Quat & result0, Aos::Quat & result1, Aos::Quat & result2, Aos::Quat & result3 ) const
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = vec_mergeh( mX, mZ );
-    tmp1 = vec_mergeh( mY, mW );
-    tmp2 = vec_mergel( mX, mZ );
-    tmp3 = vec_mergel( mY, mW );
-    result0 = Aos::Quat( vec_mergeh( tmp0, tmp1 ) );
-    result1 = Aos::Quat( vec_mergel( tmp0, tmp1 ) );
-    result2 = Aos::Quat( vec_mergeh( tmp2, tmp3 ) );
-    result3 = Aos::Quat( vec_mergel( tmp2, tmp3 ) );
-}
-
-inline Quat & Quat::operator =( const Quat & quat )
-{
-    mX = quat.mX;
-    mY = quat.mY;
-    mZ = quat.mZ;
-    mW = quat.mW;
-    return *this;
-}
-
-inline Quat & Quat::setXYZ( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    return *this;
-}
-
-inline const Vector3 Quat::getXYZ( ) const
-{
-    return Vector3( mX, mY, mZ );
-}
-
-inline Quat & Quat::setX( vec_float4 _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline vec_float4 Quat::getX( ) const
-{
-    return mX;
-}
-
-inline Quat & Quat::setY( vec_float4 _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline vec_float4 Quat::getY( ) const
-{
-    return mY;
-}
-
-inline Quat & Quat::setZ( vec_float4 _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline vec_float4 Quat::getZ( ) const
-{
-    return mZ;
-}
-
-inline Quat & Quat::setW( vec_float4 _w )
-{
-    mW = _w;
-    return *this;
-}
-
-inline vec_float4 Quat::getW( ) const
-{
-    return mW;
-}
-
-inline Quat & Quat::setElem( int idx, vec_float4 value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline vec_float4 Quat::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline Quat::vec_float4_t & Quat::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline vec_float4 Quat::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Quat Quat::operator +( const Quat & quat ) const
-{
-    return Quat(
-        vec_add( mX, quat.mX ),
-        vec_add( mY, quat.mY ),
-        vec_add( mZ, quat.mZ ),
-        vec_add( mW, quat.mW )
-    );
-}
-
-inline const Quat Quat::operator -( const Quat & quat ) const
-{
-    return Quat(
-        vec_sub( mX, quat.mX ),
-        vec_sub( mY, quat.mY ),
-        vec_sub( mZ, quat.mZ ),
-        vec_sub( mW, quat.mW )
-    );
-}
-
-inline const Quat Quat::operator *( vec_float4 scalar ) const
-{
-    return Quat(
-        vec_madd( mX, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( mY, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( mZ, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( mW, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline Quat & Quat::operator +=( const Quat & quat )
-{
-    *this = *this + quat;
-    return *this;
-}
-
-inline Quat & Quat::operator -=( const Quat & quat )
-{
-    *this = *this - quat;
-    return *this;
-}
-
-inline Quat & Quat::operator *=( vec_float4 scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Quat Quat::operator /( vec_float4 scalar ) const
-{
-    return Quat(
-        divf4( mX, scalar ),
-        divf4( mY, scalar ),
-        divf4( mZ, scalar ),
-        divf4( mW, scalar )
-    );
-}
-
-inline Quat & Quat::operator /=( vec_float4 scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Quat Quat::operator -( ) const
-{
-    return Quat(
-        negatef4( mX ),
-        negatef4( mY ),
-        negatef4( mZ ),
-        negatef4( mW )
-    );
-}
-
-inline const Quat operator *( vec_float4 scalar, const Quat & quat )
-{
-    return quat * scalar;
-}
-
-inline vec_float4 dot( const Quat & quat0, const Quat & quat1 )
-{
-    vec_float4 result;
-    result = vec_madd( quat0.getX(), quat1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( quat0.getY(), quat1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( quat0.getZ(), quat1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( quat0.getW(), quat1.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-inline vec_float4 norm( const Quat & quat )
-{
-    vec_float4 result;
-    result = vec_madd( quat.getX(), quat.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( quat.getY(), quat.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( quat.getZ(), quat.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( quat.getW(), quat.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-inline vec_float4 length( const Quat & quat )
-{
-    return sqrtf4( norm( quat ) );
-}
-
-inline const Quat normalize( const Quat & quat )
-{
-    vec_float4 lenSqr, lenInv;
-    lenSqr = norm( quat );
-    lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
-    return Quat(
-        vec_madd( quat.getX(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( quat.getY(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( quat.getZ(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( quat.getW(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Quat Quat::rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 )
-{
-    vec_float4 cosHalfAngleX2, recipCosHalfAngleX2;
-    cosHalfAngleX2 = sqrtf4( vec_madd( ((vec_float4){2.0f,2.0f,2.0f,2.0f}), vec_add( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), dot( unitVec0, unitVec1 ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    recipCosHalfAngleX2 = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), cosHalfAngleX2 );
-    return Quat( ( cross( unitVec0, unitVec1 ) * recipCosHalfAngleX2 ), vec_madd( cosHalfAngleX2, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline const Quat Quat::rotation( vec_float4 radians, const Vector3 & unitVec )
-{
-    vec_float4 s, c, angle;
-    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    return Quat( ( unitVec * s ), c );
-}
-
-inline const Quat Quat::rotationX( vec_float4 radians )
-{
-    vec_float4 s, c, angle;
-    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    return Quat( s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c );
-}
-
-inline const Quat Quat::rotationY( vec_float4 radians )
-{
-    vec_float4 s, c, angle;
-    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    return Quat( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c );
-}
-
-inline const Quat Quat::rotationZ( vec_float4 radians )
-{
-    vec_float4 s, c, angle;
-    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    return Quat( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, c );
-}
-
-inline const Quat Quat::operator *( const Quat & quat ) const
-{
-    return Quat(
-        vec_sub( vec_add( vec_add( vec_madd( mW, quat.mX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mX, quat.mW, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mY, quat.mZ, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mZ, quat.mY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_sub( vec_add( vec_add( vec_madd( mW, quat.mY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mY, quat.mW, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mZ, quat.mX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mX, quat.mZ, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_sub( vec_add( vec_add( vec_madd( mW, quat.mZ, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mZ, quat.mW, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mX, quat.mY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mY, quat.mX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_sub( vec_sub( vec_sub( vec_madd( mW, quat.mW, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mX, quat.mX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mY, quat.mY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mZ, quat.mZ, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
-    );
-}
-
-inline Quat & Quat::operator *=( const Quat & quat )
-{
-    *this = *this * quat;
-    return *this;
-}
-
-inline const Vector3 rotate( const Quat & quat, const Vector3 & vec )
-{
-    vec_float4 tmpX, tmpY, tmpZ, tmpW;
-    tmpX = vec_sub( vec_add( vec_madd( quat.getW(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat.getY(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat.getZ(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpY = vec_sub( vec_add( vec_madd( quat.getW(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat.getZ(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat.getX(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpZ = vec_sub( vec_add( vec_madd( quat.getW(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat.getX(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat.getY(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpW = vec_add( vec_add( vec_madd( quat.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return Vector3(
-        vec_add( vec_sub( vec_add( vec_madd( tmpW, quat.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmpX, quat.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpY, quat.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpZ, quat.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_sub( vec_add( vec_madd( tmpW, quat.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmpY, quat.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpZ, quat.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpX, quat.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_sub( vec_add( vec_madd( tmpW, quat.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmpZ, quat.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpX, quat.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpY, quat.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
-    );
-}
-
-inline const Quat conj( const Quat & quat )
-{
-    return Quat( negatef4( quat.getX() ), negatef4( quat.getY() ), negatef4( quat.getZ() ), quat.getW() );
-}
-
-inline const Quat select( const Quat & quat0, const Quat & quat1, vec_uint4 select1 )
-{
-    return Quat(
-        vec_sel( quat0.getX(), quat1.getX(), select1 ),
-        vec_sel( quat0.getY(), quat1.getY(), select1 ),
-        vec_sel( quat0.getZ(), quat1.getZ(), select1 ),
-        vec_sel( quat0.getW(), quat1.getW(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Quat & quat )
-{
-    Aos::Quat vec0, vec1, vec2, vec3;
-    quat.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-inline void print( const Quat & quat, const char * name )
-{
-    Aos::Quat vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    quat.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-#endif
-
-} // namespace Soa
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_SOA_CPP_H
+#define _VECTORMATH_QUAT_SOA_CPP_H
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+namespace Vectormath {
+namespace Soa {
+
+inline Quat::Quat( const Quat & quat )
+{
+    mX = quat.mX;
+    mY = quat.mY;
+    mZ = quat.mZ;
+    mW = quat.mW;
+}
+
+inline Quat::Quat( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+    mW = _w;
+}
+
+inline Quat::Quat( const Vector3 & xyz, vec_float4 _w )
+{
+    this->setXYZ( xyz );
+    this->setW( _w );
+}
+
+inline Quat::Quat( const Vector4 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    mW = vec.getW();
+}
+
+inline Quat::Quat( vec_float4 scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+    mW = scalar;
+}
+
+inline Quat::Quat( Aos::Quat quat )
+{
+    vec_float4 vec128 = quat.get128();
+    mX = vec_splat( vec128, 0 );
+    mY = vec_splat( vec128, 1 );
+    mZ = vec_splat( vec128, 2 );
+    mW = vec_splat( vec128, 3 );
+}
+
+inline Quat::Quat( Aos::Quat quat0, Aos::Quat quat1, Aos::Quat quat2, Aos::Quat quat3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = vec_mergeh( quat0.get128(), quat2.get128() );
+    tmp1 = vec_mergeh( quat1.get128(), quat3.get128() );
+    tmp2 = vec_mergel( quat0.get128(), quat2.get128() );
+    tmp3 = vec_mergel( quat1.get128(), quat3.get128() );
+    mX = vec_mergeh( tmp0, tmp1 );
+    mY = vec_mergel( tmp0, tmp1 );
+    mZ = vec_mergeh( tmp2, tmp3 );
+    mW = vec_mergel( tmp2, tmp3 );
+}
+
+inline const Quat Quat::identity( )
+{
+    return Quat( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+inline const Quat lerp( vec_float4 t, const Quat & quat0, const Quat & quat1 )
+{
+    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
+}
+
+inline const Quat slerp( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1 )
+{
+    Quat start;
+    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
+    vec_uint4 selectMask;
+    cosAngle = dot( unitQuat0, unitQuat1 );
+    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){0.0f,0.0f,0.0f,0.0f}, cosAngle );
+    cosAngle = vec_sel( cosAngle, negatef4( cosAngle ), selectMask );
+    start.setX( vec_sel( unitQuat0.getX(), negatef4( unitQuat0.getX() ), selectMask ) );
+    start.setY( vec_sel( unitQuat0.getY(), negatef4( unitQuat0.getY() ), selectMask ) );
+    start.setZ( vec_sel( unitQuat0.getZ(), negatef4( unitQuat0.getZ() ), selectMask ) );
+    start.setW( vec_sel( unitQuat0.getW(), negatef4( unitQuat0.getW() ), selectMask ) );
+    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
+    angle = acosf4( cosAngle );
+    recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
+    scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
+    scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
+    return ( ( start * scale0 ) + ( unitQuat1 * scale1 ) );
+}
+
+inline const Quat squad( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 )
+{
+    Quat tmp0, tmp1;
+    tmp0 = slerp( t, unitQuat0, unitQuat3 );
+    tmp1 = slerp( t, unitQuat1, unitQuat2 );
+    return slerp( vec_madd( vec_madd( ((vec_float4){2.0f,2.0f,2.0f,2.0f}), t, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), tmp0, tmp1 );
+}
+
+inline void Quat::get4Aos( Aos::Quat & result0, Aos::Quat & result1, Aos::Quat & result2, Aos::Quat & result3 ) const
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = vec_mergeh( mX, mZ );
+    tmp1 = vec_mergeh( mY, mW );
+    tmp2 = vec_mergel( mX, mZ );
+    tmp3 = vec_mergel( mY, mW );
+    result0 = Aos::Quat( vec_mergeh( tmp0, tmp1 ) );
+    result1 = Aos::Quat( vec_mergel( tmp0, tmp1 ) );
+    result2 = Aos::Quat( vec_mergeh( tmp2, tmp3 ) );
+    result3 = Aos::Quat( vec_mergel( tmp2, tmp3 ) );
+}
+
+inline Quat & Quat::operator =( const Quat & quat )
+{
+    mX = quat.mX;
+    mY = quat.mY;
+    mZ = quat.mZ;
+    mW = quat.mW;
+    return *this;
+}
+
+inline Quat & Quat::setXYZ( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    return *this;
+}
+
+inline const Vector3 Quat::getXYZ( ) const
+{
+    return Vector3( mX, mY, mZ );
+}
+
+inline Quat & Quat::setX( vec_float4 _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline vec_float4 Quat::getX( ) const
+{
+    return mX;
+}
+
+inline Quat & Quat::setY( vec_float4 _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline vec_float4 Quat::getY( ) const
+{
+    return mY;
+}
+
+inline Quat & Quat::setZ( vec_float4 _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline vec_float4 Quat::getZ( ) const
+{
+    return mZ;
+}
+
+inline Quat & Quat::setW( vec_float4 _w )
+{
+    mW = _w;
+    return *this;
+}
+
+inline vec_float4 Quat::getW( ) const
+{
+    return mW;
+}
+
+inline Quat & Quat::setElem( int idx, vec_float4 value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline vec_float4 Quat::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline Quat::vec_float4_t & Quat::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline vec_float4 Quat::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Quat Quat::operator +( const Quat & quat ) const
+{
+    return Quat(
+        vec_add( mX, quat.mX ),
+        vec_add( mY, quat.mY ),
+        vec_add( mZ, quat.mZ ),
+        vec_add( mW, quat.mW )
+    );
+}
+
+inline const Quat Quat::operator -( const Quat & quat ) const
+{
+    return Quat(
+        vec_sub( mX, quat.mX ),
+        vec_sub( mY, quat.mY ),
+        vec_sub( mZ, quat.mZ ),
+        vec_sub( mW, quat.mW )
+    );
+}
+
+inline const Quat Quat::operator *( vec_float4 scalar ) const
+{
+    return Quat(
+        vec_madd( mX, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( mY, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( mZ, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( mW, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline Quat & Quat::operator +=( const Quat & quat )
+{
+    *this = *this + quat;
+    return *this;
+}
+
+inline Quat & Quat::operator -=( const Quat & quat )
+{
+    *this = *this - quat;
+    return *this;
+}
+
+inline Quat & Quat::operator *=( vec_float4 scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Quat Quat::operator /( vec_float4 scalar ) const
+{
+    return Quat(
+        divf4( mX, scalar ),
+        divf4( mY, scalar ),
+        divf4( mZ, scalar ),
+        divf4( mW, scalar )
+    );
+}
+
+inline Quat & Quat::operator /=( vec_float4 scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Quat Quat::operator -( ) const
+{
+    return Quat(
+        negatef4( mX ),
+        negatef4( mY ),
+        negatef4( mZ ),
+        negatef4( mW )
+    );
+}
+
+inline const Quat operator *( vec_float4 scalar, const Quat & quat )
+{
+    return quat * scalar;
+}
+
+inline vec_float4 dot( const Quat & quat0, const Quat & quat1 )
+{
+    vec_float4 result;
+    result = vec_madd( quat0.getX(), quat1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( quat0.getY(), quat1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( quat0.getZ(), quat1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( quat0.getW(), quat1.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+inline vec_float4 norm( const Quat & quat )
+{
+    vec_float4 result;
+    result = vec_madd( quat.getX(), quat.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( quat.getY(), quat.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( quat.getZ(), quat.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( quat.getW(), quat.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+inline vec_float4 length( const Quat & quat )
+{
+    return sqrtf4( norm( quat ) );
+}
+
+inline const Quat normalize( const Quat & quat )
+{
+    vec_float4 lenSqr, lenInv;
+    lenSqr = norm( quat );
+    lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
+    return Quat(
+        vec_madd( quat.getX(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( quat.getY(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( quat.getZ(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( quat.getW(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Quat Quat::rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 )
+{
+    vec_float4 cosHalfAngleX2, recipCosHalfAngleX2;
+    cosHalfAngleX2 = sqrtf4( vec_madd( ((vec_float4){2.0f,2.0f,2.0f,2.0f}), vec_add( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), dot( unitVec0, unitVec1 ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    recipCosHalfAngleX2 = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), cosHalfAngleX2 );
+    return Quat( ( cross( unitVec0, unitVec1 ) * recipCosHalfAngleX2 ), vec_madd( cosHalfAngleX2, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline const Quat Quat::rotation( vec_float4 radians, const Vector3 & unitVec )
+{
+    vec_float4 s, c, angle;
+    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    return Quat( ( unitVec * s ), c );
+}
+
+inline const Quat Quat::rotationX( vec_float4 radians )
+{
+    vec_float4 s, c, angle;
+    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    return Quat( s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c );
+}
+
+inline const Quat Quat::rotationY( vec_float4 radians )
+{
+    vec_float4 s, c, angle;
+    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    return Quat( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c );
+}
+
+inline const Quat Quat::rotationZ( vec_float4 radians )
+{
+    vec_float4 s, c, angle;
+    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    return Quat( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, c );
+}
+
+inline const Quat Quat::operator *( const Quat & quat ) const
+{
+    return Quat(
+        vec_sub( vec_add( vec_add( vec_madd( mW, quat.mX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mX, quat.mW, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mY, quat.mZ, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mZ, quat.mY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_sub( vec_add( vec_add( vec_madd( mW, quat.mY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mY, quat.mW, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mZ, quat.mX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mX, quat.mZ, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_sub( vec_add( vec_add( vec_madd( mW, quat.mZ, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mZ, quat.mW, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mX, quat.mY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mY, quat.mX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_sub( vec_sub( vec_sub( vec_madd( mW, quat.mW, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mX, quat.mX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mY, quat.mY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mZ, quat.mZ, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
+    );
+}
+
+inline Quat & Quat::operator *=( const Quat & quat )
+{
+    *this = *this * quat;
+    return *this;
+}
+
+inline const Vector3 rotate( const Quat & quat, const Vector3 & vec )
+{
+    vec_float4 tmpX, tmpY, tmpZ, tmpW;
+    tmpX = vec_sub( vec_add( vec_madd( quat.getW(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat.getY(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat.getZ(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpY = vec_sub( vec_add( vec_madd( quat.getW(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat.getZ(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat.getX(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpZ = vec_sub( vec_add( vec_madd( quat.getW(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat.getX(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat.getY(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpW = vec_add( vec_add( vec_madd( quat.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return Vector3(
+        vec_add( vec_sub( vec_add( vec_madd( tmpW, quat.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmpX, quat.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpY, quat.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpZ, quat.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_sub( vec_add( vec_madd( tmpW, quat.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmpY, quat.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpZ, quat.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpX, quat.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_sub( vec_add( vec_madd( tmpW, quat.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmpZ, quat.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpX, quat.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpY, quat.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
+    );
+}
+
+inline const Quat conj( const Quat & quat )
+{
+    return Quat( negatef4( quat.getX() ), negatef4( quat.getY() ), negatef4( quat.getZ() ), quat.getW() );
+}
+
+inline const Quat select( const Quat & quat0, const Quat & quat1, vec_uint4 select1 )
+{
+    return Quat(
+        vec_sel( quat0.getX(), quat1.getX(), select1 ),
+        vec_sel( quat0.getY(), quat1.getY(), select1 ),
+        vec_sel( quat0.getZ(), quat1.getZ(), select1 ),
+        vec_sel( quat0.getW(), quat1.getW(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Quat & quat )
+{
+    Aos::Quat vec0, vec1, vec2, vec3;
+    quat.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+inline void print( const Quat & quat, const char * name )
+{
+    Aos::Quat vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    quat.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+#endif
+
+} // namespace Soa
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vec_aos.h b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vec_aos.h
index ef5689e99..dc1f9849b 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vec_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vec_aos.h
@@ -1,1492 +1,1492 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_AOS_CPP_H
-#define _VECTORMATH_VEC_AOS_CPP_H
-//-----------------------------------------------------------------------------
-// Constants
-// for permutes words are labeled [x,y,z,w] [a,b,c,d]
-
-#define _VECTORMATH_PERM_X 0x00010203
-#define _VECTORMATH_PERM_Y 0x04050607
-#define _VECTORMATH_PERM_Z 0x08090a0b
-#define _VECTORMATH_PERM_W 0x0c0d0e0f
-#define _VECTORMATH_PERM_A 0x10111213
-#define _VECTORMATH_PERM_B 0x14151617
-#define _VECTORMATH_PERM_C 0x18191a1b
-#define _VECTORMATH_PERM_D 0x1c1d1e1f
-#define _VECTORMATH_PERM_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A }
-#define _VECTORMATH_PERM_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_W }
-#define _VECTORMATH_PERM_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W }
-#define _VECTORMATH_PERM_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B }
-#define _VECTORMATH_PERM_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B, _VECTORMATH_PERM_C }
-#define _VECTORMATH_PERM_XYAW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_W }
-#define _VECTORMATH_PERM_XAZW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W }
-#define _VECTORMATH_MASK_0xF000 (vec_uint4){ 0xffffffff, 0, 0, 0 }
-#define _VECTORMATH_MASK_0x0F00 (vec_uint4){ 0, 0xffffffff, 0, 0 }
-#define _VECTORMATH_MASK_0x00F0 (vec_uint4){ 0, 0, 0xffffffff, 0 }
-#define _VECTORMATH_MASK_0x000F (vec_uint4){ 0, 0, 0, 0xffffffff }
-#define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
-#define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-static inline vec_float4 _vmathVfDot3( vec_float4 vec0, vec_float4 vec1 )
-{
-    vec_float4 result;
-    result = vec_madd( vec0, vec1, (vec_float4){0.0f,0.0f,0.0f,0.0f} );
-    result = vec_madd( vec_sld( vec0, vec0, 4 ), vec_sld( vec1, vec1, 4 ), result );
-    return vec_madd( vec_sld( vec0, vec0, 8 ), vec_sld( vec1, vec1, 8 ), result );
-}
-
-static inline vec_float4 _vmathVfDot4( vec_float4 vec0, vec_float4 vec1 )
-{
-    vec_float4 result;
-    result = vec_madd( vec0, vec1, (vec_float4){0.0f,0.0f,0.0f,0.0f} );
-    result = vec_madd( vec_sld( vec0, vec0, 4 ), vec_sld( vec1, vec1, 4 ), result );
-    return vec_add( vec_sld( result, result, 8 ), result );
-}
-
-static inline vec_float4 _vmathVfCross( vec_float4 vec0, vec_float4 vec1 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3, result;
-    tmp0 = vec_perm( vec0, vec0, _VECTORMATH_PERM_YZXW );
-    tmp1 = vec_perm( vec1, vec1, _VECTORMATH_PERM_ZXYW );
-    tmp2 = vec_perm( vec0, vec0, _VECTORMATH_PERM_ZXYW );
-    tmp3 = vec_perm( vec1, vec1, _VECTORMATH_PERM_YZXW );
-    result = vec_madd( tmp0, tmp1, (vec_float4){0.0f,0.0f,0.0f,0.0f} );
-    result = vec_nmsub( tmp2, tmp3, result );
-    return result;
-}
-
-static inline vec_uint4 _vmathVfToHalfFloatsUnpacked(vec_float4 v)
-{
-    vec_int4 bexp;
-    vec_uint4 mant, sign, hfloat;
-    vec_uint4 notZero, isInf;
-    const vec_uint4 hfloatInf = (vec_uint4){0x00007c00u,0x00007c00u,0x00007c00u,0x00007c00u};
-    const vec_uint4 mergeMant = (vec_uint4){0x000003ffu,0x000003ffu,0x000003ffu,0x000003ffu};
-    const vec_uint4 mergeSign = (vec_uint4){0x00008000u,0x00008000u,0x00008000u,0x00008000u};
-
-    sign = vec_sr((vec_uint4)v, (vec_uint4){16,16,16,16});
-    mant = vec_sr((vec_uint4)v, (vec_uint4){13,13,13,13});
-    bexp = vec_and(vec_sr((vec_int4)v, (vec_uint4){23,23,23,23}), (vec_int4){0xff,0xff,0xff,0xff});
-
-    notZero = (vec_uint4)vec_cmpgt(bexp, (vec_int4){112,112,112,112});
-    isInf = (vec_uint4)vec_cmpgt(bexp, (vec_int4){142,142,142,142});
-
-    bexp = vec_add(bexp, (vec_int4){-112,-112,-112,-112});
-    bexp = vec_sl(bexp, (vec_uint4){10,10,10,10});
-
-    hfloat = vec_sel((vec_uint4)bexp, mant, mergeMant);
-    hfloat = vec_sel((vec_uint4){0,0,0,0}, hfloat, notZero);
-    hfloat = vec_sel(hfloat, hfloatInf, isInf);
-    hfloat = vec_sel(hfloat, sign, mergeSign);
-
-    return hfloat;
-}
-
-static inline vec_ushort8 _vmath2VfToHalfFloats(vec_float4 u, vec_float4 v)
-{
-    vec_uint4 hfloat_u, hfloat_v;
-    const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
-    hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
-    hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
-    return (vec_ushort8)vec_perm(hfloat_u, hfloat_v, pack);
-}
-
-#ifndef __GNUC__
-#define __builtin_constant_p(x) 0
-#endif
-
-static inline vec_float4 _vmathVfInsert(vec_float4 dst, vec_float4 src, int slot)
-{
-#ifdef __GNUC__
-    if (__builtin_constant_p(slot)) {
-        dst = vec_sld(dst, dst, slot<<2);
-        dst = vec_sld(dst, src, 4);
-        if (slot != 3) dst = vec_sld(dst, dst, (3-slot)<<2);
-        return dst;
-    } else
-#endif
-    {
-        vec_uchar16 shiftpattern = vec_lvsr( 0, (float *)(size_t)(slot<<2) );
-        vec_uint4 selectmask = (vec_uint4)vec_perm( (vec_uint4){0,0,0,0}, _VECTORMATH_MASK_0xF000, shiftpattern );
-        return vec_sel( dst, src, selectmask );
-    }
-}
-
-#define _vmathVfGetElement(vec, slot) ((float *)&(vec))[slot]
-#ifdef _VECTORMATH_SET_CONSTS_IN_MEM
-#define _vmathVfSetElement(vec, scalar, slot) ((float *)&(vec))[slot] = scalar
-#else
-#define _vmathVfSetElement(vec, scalar, slot)                                            \
-{                                                                                        \
-    if (__builtin_constant_p(scalar)) {                                                  \
-        (vec) = _vmathVfInsert(vec, (vec_float4){scalar, scalar, scalar, scalar}, slot); \
-    } else {                                                                             \
-        ((float *)&(vec))[slot] = scalar;                                                \
-    }                                                                                    \
-}
-#endif
-
-static inline vec_float4 _vmathVfSplatScalar(float scalar)
-{
-    vec_float4 result;
-    if (__builtin_constant_p(scalar)) {
-        result = (vec_float4){scalar, scalar, scalar, scalar};
-    } else {
-        result = vec_ld(0, &scalar);
-        result = vec_splat(vec_perm(result, result, vec_lvsl(0, &scalar)), 0);
-    } 
-    return result;
-}
-
-static inline vec_uint4 _vmathVuiSplatScalar(unsigned int scalar)
-{
-    vec_uint4 result;
-    if (__builtin_constant_p(scalar)) {
-        result = (vec_uint4){scalar, scalar, scalar, scalar};
-    } else {
-        result = vec_ld(0, &scalar);
-        result = vec_splat(vec_perm(result, result, vec_lvsl(0, &scalar)), 0);
-    } 
-    return result;
-}
-
-#endif
-
-namespace Vectormath {
-namespace Aos {
-
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-inline VecIdx::operator floatInVec() const
-{
-    return floatInVec(ref, i);
-}
-
-inline float VecIdx::getAsFloat() const
-#else
-inline VecIdx::operator float() const
-#endif
-{
-    return _vmathVfGetElement(ref, i);
-}
-
-inline float VecIdx::operator =( float scalar )
-{
-    _vmathVfSetElement(ref, scalar, i);
-    return scalar;
-}
-
-inline floatInVec VecIdx::operator =( floatInVec scalar )
-{
-    ref = _vmathVfInsert(ref, scalar.get128(), i);
-    return scalar;
-}
-
-inline floatInVec VecIdx::operator =( const VecIdx& scalar )
-{
-    return *this = floatInVec(scalar.ref, scalar.i);
-}
-
-inline floatInVec VecIdx::operator *=( float scalar )
-{
-    return *this *= floatInVec(scalar);
-}
-
-inline floatInVec VecIdx::operator *=( floatInVec scalar )
-{
-    return *this = floatInVec(ref, i) * scalar;
-}
-
-inline floatInVec VecIdx::operator /=( float scalar )
-{
-    return *this /= floatInVec(scalar);
-}
-
-inline floatInVec VecIdx::operator /=( floatInVec scalar )
-{
-    return *this = floatInVec(ref, i) / scalar;
-}
-
-inline floatInVec VecIdx::operator +=( float scalar )
-{
-    return *this += floatInVec(scalar);
-}
-
-inline floatInVec VecIdx::operator +=( floatInVec scalar )
-{
-    return *this = floatInVec(ref, i) + scalar;
-}
-
-inline floatInVec VecIdx::operator -=( float scalar )
-{
-    return *this -= floatInVec(scalar);
-}
-
-inline floatInVec VecIdx::operator -=( floatInVec scalar )
-{
-    return *this = floatInVec(ref, i) - scalar;
-}
-
-inline Vector3::Vector3( float _x, float _y, float _z )
-{
-    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) & __builtin_constant_p(_z)) {
-        mVec128 = (vec_float4){_x, _y, _z, 0.0f};
-    } else {
-        float *pf = (float *)&mVec128;
-        pf[0] = _x;
-        pf[1] = _y;
-        pf[2] = _z;
-    }
-}
-
-inline Vector3::Vector3( floatInVec _x, floatInVec _y, floatInVec _z )
-{
-    vec_float4 xz = vec_mergeh( _x.get128(), _z.get128() );
-    mVec128 = vec_mergeh( xz, _y.get128() );
-}
-
-inline Vector3::Vector3( Point3 pnt )
-{
-    mVec128 = pnt.get128();
-}
-
-inline Vector3::Vector3( float scalar )
-{
-    mVec128 = floatInVec(scalar).get128();
-}
-
-inline Vector3::Vector3( floatInVec scalar )
-{
-    mVec128 = scalar.get128();
-}
-
-inline Vector3::Vector3( vec_float4 vf4 )
-{
-    mVec128 = vf4;
-}
-
-inline const Vector3 Vector3::xAxis( )
-{
-    return Vector3( _VECTORMATH_UNIT_1000 );
-}
-
-inline const Vector3 Vector3::yAxis( )
-{
-    return Vector3( _VECTORMATH_UNIT_0100 );
-}
-
-inline const Vector3 Vector3::zAxis( )
-{
-    return Vector3( _VECTORMATH_UNIT_0010 );
-}
-
-inline const Vector3 lerp( float t, Vector3 vec0, Vector3 vec1 )
-{
-    return lerp( floatInVec(t), vec0, vec1 );
-}
-
-inline const Vector3 lerp( floatInVec t, Vector3 vec0, Vector3 vec1 )
-{
-    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
-}
-
-inline const Vector3 slerp( float t, Vector3 unitVec0, Vector3 unitVec1 )
-{
-    return slerp( floatInVec(t), unitVec0, unitVec1 );
-}
-
-inline const Vector3 slerp( floatInVec t, Vector3 unitVec0, Vector3 unitVec1 )
-{
-    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    vec_uint4 selectMask;
-    cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
-    cosAngle = vec_splat( cosAngle, 0 );
-    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = t.get128();
-    oneMinusT = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
-    angles = vec_mergeh( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
-    angles = vec_mergeh( angles, oneMinusT );
-    angles = vec_madd( angles, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sines = sinf4( angles );
-    scales = divf4( sines, vec_splat( sines, 0 ) );
-    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
-    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
-    return Vector3( vec_madd( unitVec0.get128(), scale0, vec_madd( unitVec1.get128(), scale1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-}
-
-inline vec_float4 Vector3::get128( ) const
-{
-    return mVec128;
-}
-
-inline void storeXYZ( Vector3 vec, vec_float4 * quad )
-{
-    vec_float4 dstVec = *quad;
-    vec_uint4 mask = _VECTORMATH_MASK_0x000F;
-    dstVec = vec_sel(vec.get128(), dstVec, mask);
-    *quad = dstVec;
-}
-
-inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const vec_float4 * threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyz1 = vec_sld( xyzx, yzxy, 12 );
-    xyz2 = vec_sld( yzxy, zxyz, 8 );
-    xyz3 = vec_sld( zxyz, zxyz, 4 );
-    vec0 = Vector3( xyzx );
-    vec1 = Vector3( xyz1 );
-    vec2 = Vector3( xyz2 );
-    vec3 = Vector3( xyz3 );
-}
-
-inline void storeXYZArray( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, vec_float4 * threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz;
-    xyzx = vec_perm( vec0.get128(), vec1.get128(), _VECTORMATH_PERM_XYZA );
-    yzxy = vec_perm( vec1.get128(), vec2.get128(), _VECTORMATH_PERM_YZAB );
-    zxyz = vec_perm( vec2.get128(), vec3.get128(), _VECTORMATH_PERM_ZABC );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-inline void storeHalfFloats( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, Vector3 vec4, Vector3 vec5, Vector3 vec6, Vector3 vec7, vec_ushort8 * threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    storeXYZArray( vec0, vec1, vec2, vec3, xyz0 );
-    storeXYZArray( vec4, vec5, vec6, vec7, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-inline Vector3 & Vector3::operator =( Vector3 vec )
-{
-    mVec128 = vec.mVec128;
-    return *this;
-}
-
-inline Vector3 & Vector3::setX( float _x )
-{
-    _vmathVfSetElement(mVec128, _x, 0);
-    return *this;
-}
-
-inline Vector3 & Vector3::setX( floatInVec _x )
-{
-    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
-    return *this;
-}
-
-inline const floatInVec Vector3::getX( ) const
-{
-    return floatInVec( mVec128, 0 );
-}
-
-inline Vector3 & Vector3::setY( float _y )
-{
-    _vmathVfSetElement(mVec128, _y, 1);
-    return *this;
-}
-
-inline Vector3 & Vector3::setY( floatInVec _y )
-{
-    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
-    return *this;
-}
-
-inline const floatInVec Vector3::getY( ) const
-{
-    return floatInVec( mVec128, 1 );
-}
-
-inline Vector3 & Vector3::setZ( float _z )
-{
-    _vmathVfSetElement(mVec128, _z, 2);
-    return *this;
-}
-
-inline Vector3 & Vector3::setZ( floatInVec _z )
-{
-    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
-    return *this;
-}
-
-inline const floatInVec Vector3::getZ( ) const
-{
-    return floatInVec( mVec128, 2 );
-}
-
-inline Vector3 & Vector3::setElem( int idx, float value )
-{
-    _vmathVfSetElement(mVec128, value, idx);
-    return *this;
-}
-
-inline Vector3 & Vector3::setElem( int idx, floatInVec value )
-{
-    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
-    return *this;
-}
-
-inline const floatInVec Vector3::getElem( int idx ) const
-{
-    return floatInVec( mVec128, idx );
-}
-
-inline VecIdx Vector3::operator []( int idx )
-{
-    return VecIdx( mVec128, idx );
-}
-
-inline const floatInVec Vector3::operator []( int idx ) const
-{
-    return floatInVec( mVec128, idx );
-}
-
-inline const Vector3 Vector3::operator +( Vector3 vec ) const
-{
-    return Vector3( vec_add( mVec128, vec.mVec128 ) );
-}
-
-inline const Vector3 Vector3::operator -( Vector3 vec ) const
-{
-    return Vector3( vec_sub( mVec128, vec.mVec128 ) );
-}
-
-inline const Point3 Vector3::operator +( Point3 pnt ) const
-{
-    return Point3( vec_add( mVec128, pnt.get128() ) );
-}
-
-inline const Vector3 Vector3::operator *( float scalar ) const
-{
-    return *this * floatInVec(scalar);
-}
-
-inline const Vector3 Vector3::operator *( floatInVec scalar ) const
-{
-    return Vector3( vec_madd( mVec128, scalar.get128(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline Vector3 & Vector3::operator +=( Vector3 vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator -=( Vector3 vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator *=( floatInVec scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Vector3 Vector3::operator /( float scalar ) const
-{
-    return *this / floatInVec(scalar);
-}
-
-inline const Vector3 Vector3::operator /( floatInVec scalar ) const
-{
-    return Vector3( divf4( mVec128, scalar.get128() ) );
-}
-
-inline Vector3 & Vector3::operator /=( float scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator /=( floatInVec scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Vector3 Vector3::operator -( ) const
-{
-    return Vector3( negatef4( mVec128 ) );
-}
-
-inline const Vector3 operator *( float scalar, Vector3 vec )
-{
-    return floatInVec(scalar) * vec;
-}
-
-inline const Vector3 operator *( floatInVec scalar, Vector3 vec )
-{
-    return vec * scalar;
-}
-
-inline const Vector3 mulPerElem( Vector3 vec0, Vector3 vec1 )
-{
-    return Vector3( vec_madd( vec0.get128(), vec1.get128(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline const Vector3 divPerElem( Vector3 vec0, Vector3 vec1 )
-{
-    return Vector3( divf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector3 recipPerElem( Vector3 vec )
-{
-    return Vector3( recipf4( vec.get128() ) );
-}
-
-inline const Vector3 sqrtPerElem( Vector3 vec )
-{
-    return Vector3( sqrtf4( vec.get128() ) );
-}
-
-inline const Vector3 rsqrtPerElem( Vector3 vec )
-{
-    return Vector3( rsqrtf4( vec.get128() ) );
-}
-
-inline const Vector3 absPerElem( Vector3 vec )
-{
-    return Vector3( fabsf4( vec.get128() ) );
-}
-
-inline const Vector3 copySignPerElem( Vector3 vec0, Vector3 vec1 )
-{
-    return Vector3( copysignf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector3 maxPerElem( Vector3 vec0, Vector3 vec1 )
-{
-    return Vector3( fmaxf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline const floatInVec maxElem( Vector3 vec )
-{
-    vec_float4 result;
-    result = fmaxf4( vec_splat( vec.get128(), 1 ), vec.get128() );
-    result = fmaxf4( vec_splat( vec.get128(), 2 ), result );
-    return floatInVec( result, 0 );
-}
-
-inline const Vector3 minPerElem( Vector3 vec0, Vector3 vec1 )
-{
-    return Vector3( fminf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline const floatInVec minElem( Vector3 vec )
-{
-    vec_float4 result;
-    result = fminf4( vec_splat( vec.get128(), 1 ), vec.get128() );
-    result = fminf4( vec_splat( vec.get128(), 2 ), result );
-    return floatInVec( result, 0 );
-}
-
-inline const floatInVec sum( Vector3 vec )
-{
-    vec_float4 result;
-    result = vec_add( vec_splat( vec.get128(), 1 ), vec.get128() );
-    result = vec_add( vec_splat( vec.get128(), 2 ), result );
-    return floatInVec( result, 0 );
-}
-
-inline const floatInVec dot( Vector3 vec0, Vector3 vec1 )
-{
-    return floatInVec( _vmathVfDot3( vec0.get128(), vec1.get128() ), 0 );
-}
-
-inline const floatInVec lengthSqr( Vector3 vec )
-{
-    return floatInVec(  _vmathVfDot3( vec.get128(), vec.get128() ), 0 );
-}
-
-inline const floatInVec length( Vector3 vec )
-{
-    return floatInVec(  sqrtf4(_vmathVfDot3( vec.get128(), vec.get128() )), 0 );
-}
-
-inline const Vector3 normalize( Vector3 vec )
-{
-    vec_float4 dot = _vmathVfDot3( vec.get128(), vec.get128() );
-    dot = vec_splat( dot, 0 );
-    return Vector3( vec_madd( vec.get128(), rsqrtf4( dot ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline const Vector3 cross( Vector3 vec0, Vector3 vec1 )
-{
-    return Vector3( _vmathVfCross( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector3 select( Vector3 vec0, Vector3 vec1, bool select1 )
-{
-    return select( vec0, vec1, boolInVec(select1) );
-}
-
-inline const Vector3 select( Vector3 vec0, Vector3 vec1, boolInVec select1 )
-{
-    return Vector3( vec_sel( vec0.get128(), vec1.get128(), select1.get128() ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( Vector3 vec )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec.get128();
-    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-inline void print( Vector3 vec, const char * name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec.get128();
-    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-#endif
-
-inline Vector4::Vector4( float _x, float _y, float _z, float _w )
-{
-    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) &
-        __builtin_constant_p(_z) & __builtin_constant_p(_w)) {
-        mVec128 = (vec_float4){_x, _y, _z, _w};
-    } else {
-        float *pf = (float *)&mVec128;
-        pf[0] = _x;
-        pf[1] = _y;
-        pf[2] = _z;
-        pf[3] = _w;
-    }
-}
-
-inline Vector4::Vector4( floatInVec _x, floatInVec _y, floatInVec _z, floatInVec _w )
-{
-    vec_float4 xz = vec_mergeh( _x.get128(), _z.get128() );
-    vec_float4 yw = vec_mergeh( _y.get128(), _w.get128() );
-    mVec128 = vec_mergeh( xz, yw );
-}
-
-inline Vector4::Vector4( Vector3 xyz, float _w )
-{
-    mVec128 = xyz.get128();
-    _vmathVfSetElement(mVec128, _w, 3);
-}
-
-inline Vector4::Vector4( Vector3 xyz, floatInVec _w )
-{
-    mVec128 = xyz.get128();
-    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
-}
-
-inline Vector4::Vector4( Vector3 vec )
-{
-    mVec128 = vec.get128();
-    mVec128 = _vmathVfInsert(mVec128, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), 3);
-}
-
-inline Vector4::Vector4( Point3 pnt )
-{
-    mVec128 = pnt.get128();
-    mVec128 = _vmathVfInsert(mVec128, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), 3);
-}
-
-inline Vector4::Vector4( Quat quat )
-{
-    mVec128 = quat.get128();
-}
-
-inline Vector4::Vector4( float scalar )
-{
-    mVec128 = floatInVec(scalar).get128();
-}
-
-inline Vector4::Vector4( floatInVec scalar )
-{
-    mVec128 = scalar.get128();
-}
-
-inline Vector4::Vector4( vec_float4 vf4 )
-{
-    mVec128 = vf4;
-}
-
-inline const Vector4 Vector4::xAxis( )
-{
-    return Vector4( _VECTORMATH_UNIT_1000 );
-}
-
-inline const Vector4 Vector4::yAxis( )
-{
-    return Vector4( _VECTORMATH_UNIT_0100 );
-}
-
-inline const Vector4 Vector4::zAxis( )
-{
-    return Vector4( _VECTORMATH_UNIT_0010 );
-}
-
-inline const Vector4 Vector4::wAxis( )
-{
-    return Vector4( _VECTORMATH_UNIT_0001 );
-}
-
-inline const Vector4 lerp( float t, Vector4 vec0, Vector4 vec1 )
-{
-    return lerp( floatInVec(t), vec0, vec1 );
-}
-
-inline const Vector4 lerp( floatInVec t, Vector4 vec0, Vector4 vec1 )
-{
-    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
-}
-
-inline const Vector4 slerp( float t, Vector4 unitVec0, Vector4 unitVec1 )
-{
-    return slerp( floatInVec(t), unitVec0, unitVec1 );
-}
-
-inline const Vector4 slerp( floatInVec t, Vector4 unitVec0, Vector4 unitVec1 )
-{
-    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    vec_uint4 selectMask;
-    cosAngle = _vmathVfDot4( unitVec0.get128(), unitVec1.get128() );
-    cosAngle = vec_splat( cosAngle, 0 );
-    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = t.get128();
-    oneMinusT = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
-    angles = vec_mergeh( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
-    angles = vec_mergeh( angles, oneMinusT );
-    angles = vec_madd( angles, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sines = sinf4( angles );
-    scales = divf4( sines, vec_splat( sines, 0 ) );
-    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
-    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
-    return Vector4( vec_madd( unitVec0.get128(), scale0, vec_madd( unitVec1.get128(), scale1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-}
-
-inline vec_float4 Vector4::get128( ) const
-{
-    return mVec128;
-}
-
-inline void storeHalfFloats( Vector4 vec0, Vector4 vec1, Vector4 vec2, Vector4 vec3, vec_ushort8 * twoQuads )
-{
-    twoQuads[0] = _vmath2VfToHalfFloats(vec0.get128(), vec1.get128());
-    twoQuads[1] = _vmath2VfToHalfFloats(vec2.get128(), vec3.get128());
-}
-
-inline Vector4 & Vector4::operator =( Vector4 vec )
-{
-    mVec128 = vec.mVec128;
-    return *this;
-}
-
-inline Vector4 & Vector4::setXYZ( Vector3 vec )
-{
-    mVec128 = vec_sel( vec.get128(), mVec128, _VECTORMATH_MASK_0x000F );
-    return *this;
-}
-
-inline const Vector3 Vector4::getXYZ( ) const
-{
-    return Vector3( mVec128 );
-}
-
-inline Vector4 & Vector4::setX( float _x )
-{
-    _vmathVfSetElement(mVec128, _x, 0);
-    return *this;
-}
-
-inline Vector4 & Vector4::setX( floatInVec _x )
-{
-    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
-    return *this;
-}
-
-inline const floatInVec Vector4::getX( ) const
-{
-    return floatInVec( mVec128, 0 );
-}
-
-inline Vector4 & Vector4::setY( float _y )
-{
-    _vmathVfSetElement(mVec128, _y, 1);
-    return *this;
-}
-
-inline Vector4 & Vector4::setY( floatInVec _y )
-{
-    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
-    return *this;
-}
-
-inline const floatInVec Vector4::getY( ) const
-{
-    return floatInVec( mVec128, 1 );
-}
-
-inline Vector4 & Vector4::setZ( float _z )
-{
-    _vmathVfSetElement(mVec128, _z, 2);
-    return *this;
-}
-
-inline Vector4 & Vector4::setZ( floatInVec _z )
-{
-    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
-    return *this;
-}
-
-inline const floatInVec Vector4::getZ( ) const
-{
-    return floatInVec( mVec128, 2 );
-}
-
-inline Vector4 & Vector4::setW( float _w )
-{
-    _vmathVfSetElement(mVec128, _w, 3);
-    return *this;
-}
-
-inline Vector4 & Vector4::setW( floatInVec _w )
-{
-    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
-    return *this;
-}
-
-inline const floatInVec Vector4::getW( ) const
-{
-    return floatInVec( mVec128, 3 );
-}
-
-inline Vector4 & Vector4::setElem( int idx, float value )
-{
-    _vmathVfSetElement(mVec128, value, idx);
-    return *this;
-}
-
-inline Vector4 & Vector4::setElem( int idx, floatInVec value )
-{
-    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
-    return *this;
-}
-
-inline const floatInVec Vector4::getElem( int idx ) const
-{
-    return floatInVec( mVec128, idx );
-}
-
-inline VecIdx Vector4::operator []( int idx )
-{
-    return VecIdx( mVec128, idx );
-}
-
-inline const floatInVec Vector4::operator []( int idx ) const
-{
-    return floatInVec( mVec128, idx );
-}
-
-inline const Vector4 Vector4::operator +( Vector4 vec ) const
-{
-    return Vector4( vec_add( mVec128, vec.mVec128 ) );
-}
-
-inline const Vector4 Vector4::operator -( Vector4 vec ) const
-{
-    return Vector4( vec_sub( mVec128, vec.mVec128 ) );
-}
-
-inline const Vector4 Vector4::operator *( float scalar ) const
-{
-    return *this * floatInVec(scalar);
-}
-
-inline const Vector4 Vector4::operator *( floatInVec scalar ) const
-{
-    return Vector4( vec_madd( mVec128, scalar.get128(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline Vector4 & Vector4::operator +=( Vector4 vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator -=( Vector4 vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator *=( floatInVec scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Vector4 Vector4::operator /( float scalar ) const
-{
-    return *this / floatInVec(scalar);
-}
-
-inline const Vector4 Vector4::operator /( floatInVec scalar ) const
-{
-    return Vector4( divf4( mVec128, scalar.get128() ) );
-}
-
-inline Vector4 & Vector4::operator /=( float scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator /=( floatInVec scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Vector4 Vector4::operator -( ) const
-{
-    return Vector4( negatef4( mVec128 ) );
-}
-
-inline const Vector4 operator *( float scalar, Vector4 vec )
-{
-    return floatInVec(scalar) * vec;
-}
-
-inline const Vector4 operator *( floatInVec scalar, Vector4 vec )
-{
-    return vec * scalar;
-}
-
-inline const Vector4 mulPerElem( Vector4 vec0, Vector4 vec1 )
-{
-    return Vector4( vec_madd( vec0.get128(), vec1.get128(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline const Vector4 divPerElem( Vector4 vec0, Vector4 vec1 )
-{
-    return Vector4( divf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector4 recipPerElem( Vector4 vec )
-{
-    return Vector4( recipf4( vec.get128() ) );
-}
-
-inline const Vector4 sqrtPerElem( Vector4 vec )
-{
-    return Vector4( sqrtf4( vec.get128() ) );
-}
-
-inline const Vector4 rsqrtPerElem( Vector4 vec )
-{
-    return Vector4( rsqrtf4( vec.get128() ) );
-}
-
-inline const Vector4 absPerElem( Vector4 vec )
-{
-    return Vector4( fabsf4( vec.get128() ) );
-}
-
-inline const Vector4 copySignPerElem( Vector4 vec0, Vector4 vec1 )
-{
-    return Vector4( copysignf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector4 maxPerElem( Vector4 vec0, Vector4 vec1 )
-{
-    return Vector4( fmaxf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline const floatInVec maxElem( Vector4 vec )
-{
-    vec_float4 result;
-    result = fmaxf4( vec_splat( vec.get128(), 1 ), vec.get128() );
-    result = fmaxf4( vec_splat( vec.get128(), 2 ), result );
-    result = fmaxf4( vec_splat( vec.get128(), 3 ), result );
-    return floatInVec( result, 0 );
-}
-
-inline const Vector4 minPerElem( Vector4 vec0, Vector4 vec1 )
-{
-    return Vector4( fminf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline const floatInVec minElem( Vector4 vec )
-{
-    vec_float4 result;
-    result = fminf4( vec_splat( vec.get128(), 1 ), vec.get128() );
-    result = fminf4( vec_splat( vec.get128(), 2 ), result );
-    result = fminf4( vec_splat( vec.get128(), 3 ), result );
-    return floatInVec( result, 0 );
-}
-
-inline const floatInVec sum( Vector4 vec )
-{
-    vec_float4 result;
-    result = vec_add( vec_splat( vec.get128(), 1 ), vec.get128() );
-    result = vec_add( vec_splat( vec.get128(), 2 ), result );
-    result = vec_add( vec_splat( vec.get128(), 3 ), result );
-    return floatInVec( result, 0 );
-}
-
-inline const floatInVec dot( Vector4 vec0, Vector4 vec1 )
-{
-    return floatInVec( _vmathVfDot4( vec0.get128(), vec1.get128() ), 0 );
-}
-
-inline const floatInVec lengthSqr( Vector4 vec )
-{
-    return floatInVec(  _vmathVfDot4( vec.get128(), vec.get128() ), 0 );
-}
-
-inline const floatInVec length( Vector4 vec )
-{
-    return floatInVec(  sqrtf4(_vmathVfDot4( vec.get128(), vec.get128() )), 0 );
-}
-
-inline const Vector4 normalize( Vector4 vec )
-{
-    vec_float4 dot = _vmathVfDot4( vec.get128(), vec.get128() );
-    return Vector4( vec_madd( vec.get128(), rsqrtf4( dot ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline const Vector4 select( Vector4 vec0, Vector4 vec1, bool select1 )
-{
-    return select( vec0, vec1, boolInVec(select1) );
-}
-
-inline const Vector4 select( Vector4 vec0, Vector4 vec1, boolInVec select1 )
-{
-    return Vector4( vec_sel( vec0.get128(), vec1.get128(), select1.get128() ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( Vector4 vec )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec.get128();
-    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-inline void print( Vector4 vec, const char * name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec.get128();
-    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-#endif
-
-inline Point3::Point3( float _x, float _y, float _z )
-{
-    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) & __builtin_constant_p(_z)) {
-        mVec128 = (vec_float4){_x, _y, _z, 0.0f};
-    } else {
-        float *pf = (float *)&mVec128;
-        pf[0] = _x;
-        pf[1] = _y;
-        pf[2] = _z;
-    }
-}
-
-inline Point3::Point3( floatInVec _x, floatInVec _y, floatInVec _z )
-{
-    vec_float4 xz = vec_mergeh( _x.get128(), _z.get128() );
-    mVec128 = vec_mergeh( xz, _y.get128() );
-}
-
-inline Point3::Point3( Vector3 vec )
-{
-    mVec128 = vec.get128();
-}
-
-inline Point3::Point3( float scalar )
-{
-    mVec128 = floatInVec(scalar).get128();
-}
-
-inline Point3::Point3( floatInVec scalar )
-{
-    mVec128 = scalar.get128();
-}
-
-inline Point3::Point3( vec_float4 vf4 )
-{
-    mVec128 = vf4;
-}
-
-inline const Point3 lerp( float t, Point3 pnt0, Point3 pnt1 )
-{
-    return lerp( floatInVec(t), pnt0, pnt1 );
-}
-
-inline const Point3 lerp( floatInVec t, Point3 pnt0, Point3 pnt1 )
-{
-    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
-}
-
-inline vec_float4 Point3::get128( ) const
-{
-    return mVec128;
-}
-
-inline void storeXYZ( Point3 pnt, vec_float4 * quad )
-{
-    vec_float4 dstVec = *quad;
-    vec_uint4 mask = _VECTORMATH_MASK_0x000F;
-    dstVec = vec_sel(pnt.get128(), dstVec, mask);
-    *quad = dstVec;
-}
-
-inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const vec_float4 * threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyz1 = vec_sld( xyzx, yzxy, 12 );
-    xyz2 = vec_sld( yzxy, zxyz, 8 );
-    xyz3 = vec_sld( zxyz, zxyz, 4 );
-    pnt0 = Point3( xyzx );
-    pnt1 = Point3( xyz1 );
-    pnt2 = Point3( xyz2 );
-    pnt3 = Point3( xyz3 );
-}
-
-inline void storeXYZArray( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, vec_float4 * threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz;
-    xyzx = vec_perm( pnt0.get128(), pnt1.get128(), _VECTORMATH_PERM_XYZA );
-    yzxy = vec_perm( pnt1.get128(), pnt2.get128(), _VECTORMATH_PERM_YZAB );
-    zxyz = vec_perm( pnt2.get128(), pnt3.get128(), _VECTORMATH_PERM_ZABC );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-inline void storeHalfFloats( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, Point3 pnt4, Point3 pnt5, Point3 pnt6, Point3 pnt7, vec_ushort8 * threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    storeXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
-    storeXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-inline Point3 & Point3::operator =( Point3 pnt )
-{
-    mVec128 = pnt.mVec128;
-    return *this;
-}
-
-inline Point3 & Point3::setX( float _x )
-{
-    _vmathVfSetElement(mVec128, _x, 0);
-    return *this;
-}
-
-inline Point3 & Point3::setX( floatInVec _x )
-{
-    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
-    return *this;
-}
-
-inline const floatInVec Point3::getX( ) const
-{
-    return floatInVec( mVec128, 0 );
-}
-
-inline Point3 & Point3::setY( float _y )
-{
-    _vmathVfSetElement(mVec128, _y, 1);
-    return *this;
-}
-
-inline Point3 & Point3::setY( floatInVec _y )
-{
-    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
-    return *this;
-}
-
-inline const floatInVec Point3::getY( ) const
-{
-    return floatInVec( mVec128, 1 );
-}
-
-inline Point3 & Point3::setZ( float _z )
-{
-    _vmathVfSetElement(mVec128, _z, 2);
-    return *this;
-}
-
-inline Point3 & Point3::setZ( floatInVec _z )
-{
-    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
-    return *this;
-}
-
-inline const floatInVec Point3::getZ( ) const
-{
-    return floatInVec( mVec128, 2 );
-}
-
-inline Point3 & Point3::setElem( int idx, float value )
-{
-    _vmathVfSetElement(mVec128, value, idx);
-    return *this;
-}
-
-inline Point3 & Point3::setElem( int idx, floatInVec value )
-{
-    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
-    return *this;
-}
-
-inline const floatInVec Point3::getElem( int idx ) const
-{
-    return floatInVec( mVec128, idx );
-}
-
-inline VecIdx Point3::operator []( int idx )
-{
-    return VecIdx( mVec128, idx );
-}
-
-inline const floatInVec Point3::operator []( int idx ) const
-{
-    return floatInVec( mVec128, idx );
-}
-
-inline const Vector3 Point3::operator -( Point3 pnt ) const
-{
-    return Vector3( vec_sub( mVec128, pnt.mVec128 ) );
-}
-
-inline const Point3 Point3::operator +( Vector3 vec ) const
-{
-    return Point3( vec_add( mVec128, vec.get128() ) );
-}
-
-inline const Point3 Point3::operator -( Vector3 vec ) const
-{
-    return Point3( vec_sub( mVec128, vec.get128() ) );
-}
-
-inline Point3 & Point3::operator +=( Vector3 vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Point3 & Point3::operator -=( Vector3 vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline const Point3 mulPerElem( Point3 pnt0, Point3 pnt1 )
-{
-    return Point3( vec_madd( pnt0.get128(), pnt1.get128(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline const Point3 divPerElem( Point3 pnt0, Point3 pnt1 )
-{
-    return Point3( divf4( pnt0.get128(), pnt1.get128() ) );
-}
-
-inline const Point3 recipPerElem( Point3 pnt )
-{
-    return Point3( recipf4( pnt.get128() ) );
-}
-
-inline const Point3 sqrtPerElem( Point3 pnt )
-{
-    return Point3( sqrtf4( pnt.get128() ) );
-}
-
-inline const Point3 rsqrtPerElem( Point3 pnt )
-{
-    return Point3( rsqrtf4( pnt.get128() ) );
-}
-
-inline const Point3 absPerElem( Point3 pnt )
-{
-    return Point3( fabsf4( pnt.get128() ) );
-}
-
-inline const Point3 copySignPerElem( Point3 pnt0, Point3 pnt1 )
-{
-    return Point3( copysignf4( pnt0.get128(), pnt1.get128() ) );
-}
-
-inline const Point3 maxPerElem( Point3 pnt0, Point3 pnt1 )
-{
-    return Point3( fmaxf4( pnt0.get128(), pnt1.get128() ) );
-}
-
-inline const floatInVec maxElem( Point3 pnt )
-{
-    vec_float4 result;
-    result = fmaxf4( vec_splat( pnt.get128(), 1 ), pnt.get128() );
-    result = fmaxf4( vec_splat( pnt.get128(), 2 ), result );
-    return floatInVec( result, 0 );
-}
-
-inline const Point3 minPerElem( Point3 pnt0, Point3 pnt1 )
-{
-    return Point3( fminf4( pnt0.get128(), pnt1.get128() ) );
-}
-
-inline const floatInVec minElem( Point3 pnt )
-{
-    vec_float4 result;
-    result = fminf4( vec_splat( pnt.get128(), 1 ), pnt.get128() );
-    result = fminf4( vec_splat( pnt.get128(), 2 ), result );
-    return floatInVec( result, 0 );
-}
-
-inline const floatInVec sum( Point3 pnt )
-{
-    vec_float4 result;
-    result = vec_add( vec_splat( pnt.get128(), 1 ), pnt.get128() );
-    result = vec_add( vec_splat( pnt.get128(), 2 ), result );
-    return floatInVec( result, 0 );
-}
-
-inline const Point3 scale( Point3 pnt, float scaleVal )
-{
-    return scale( pnt, floatInVec( scaleVal ) );
-}
-
-inline const Point3 scale( Point3 pnt, floatInVec scaleVal )
-{
-    return mulPerElem( pnt, Point3( scaleVal ) );
-}
-
-inline const Point3 scale( Point3 pnt, Vector3 scaleVec )
-{
-    return mulPerElem( pnt, Point3( scaleVec ) );
-}
-
-inline const floatInVec projection( Point3 pnt, Vector3 unitVec )
-{
-    return floatInVec( _vmathVfDot3( pnt.get128(), unitVec.get128() ), 0 );
-}
-
-inline const floatInVec distSqrFromOrigin( Point3 pnt )
-{
-    return lengthSqr( Vector3( pnt ) );
-}
-
-inline const floatInVec distFromOrigin( Point3 pnt )
-{
-    return length( Vector3( pnt ) );
-}
-
-inline const floatInVec distSqr( Point3 pnt0, Point3 pnt1 )
-{
-    return lengthSqr( ( pnt1 - pnt0 ) );
-}
-
-inline const floatInVec dist( Point3 pnt0, Point3 pnt1 )
-{
-    return length( ( pnt1 - pnt0 ) );
-}
-
-inline const Point3 select( Point3 pnt0, Point3 pnt1, bool select1 )
-{
-    return select( pnt0, pnt1, boolInVec(select1) );
-}
-
-inline const Point3 select( Point3 pnt0, Point3 pnt1, boolInVec select1 )
-{
-    return Point3( vec_sel( pnt0.get128(), pnt1.get128(), select1.get128() ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( Point3 pnt )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = pnt.get128();
-    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-inline void print( Point3 pnt, const char * name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = pnt.get128();
-    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-#endif
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_AOS_CPP_H
+#define _VECTORMATH_VEC_AOS_CPP_H
+//-----------------------------------------------------------------------------
+// Constants
+// for permutes words are labeled [x,y,z,w] [a,b,c,d]
+
+#define _VECTORMATH_PERM_X 0x00010203
+#define _VECTORMATH_PERM_Y 0x04050607
+#define _VECTORMATH_PERM_Z 0x08090a0b
+#define _VECTORMATH_PERM_W 0x0c0d0e0f
+#define _VECTORMATH_PERM_A 0x10111213
+#define _VECTORMATH_PERM_B 0x14151617
+#define _VECTORMATH_PERM_C 0x18191a1b
+#define _VECTORMATH_PERM_D 0x1c1d1e1f
+#define _VECTORMATH_PERM_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A }
+#define _VECTORMATH_PERM_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B }
+#define _VECTORMATH_PERM_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B, _VECTORMATH_PERM_C }
+#define _VECTORMATH_PERM_XYAW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_XAZW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W }
+#define _VECTORMATH_MASK_0xF000 (vec_uint4){ 0xffffffff, 0, 0, 0 }
+#define _VECTORMATH_MASK_0x0F00 (vec_uint4){ 0, 0xffffffff, 0, 0 }
+#define _VECTORMATH_MASK_0x00F0 (vec_uint4){ 0, 0, 0xffffffff, 0 }
+#define _VECTORMATH_MASK_0x000F (vec_uint4){ 0, 0, 0, 0xffffffff }
+#define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
+#define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+static inline vec_float4 _vmathVfDot3( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 result;
+    result = vec_madd( vec0, vec1, (vec_float4){0.0f,0.0f,0.0f,0.0f} );
+    result = vec_madd( vec_sld( vec0, vec0, 4 ), vec_sld( vec1, vec1, 4 ), result );
+    return vec_madd( vec_sld( vec0, vec0, 8 ), vec_sld( vec1, vec1, 8 ), result );
+}
+
+static inline vec_float4 _vmathVfDot4( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 result;
+    result = vec_madd( vec0, vec1, (vec_float4){0.0f,0.0f,0.0f,0.0f} );
+    result = vec_madd( vec_sld( vec0, vec0, 4 ), vec_sld( vec1, vec1, 4 ), result );
+    return vec_add( vec_sld( result, result, 8 ), result );
+}
+
+static inline vec_float4 _vmathVfCross( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3, result;
+    tmp0 = vec_perm( vec0, vec0, _VECTORMATH_PERM_YZXW );
+    tmp1 = vec_perm( vec1, vec1, _VECTORMATH_PERM_ZXYW );
+    tmp2 = vec_perm( vec0, vec0, _VECTORMATH_PERM_ZXYW );
+    tmp3 = vec_perm( vec1, vec1, _VECTORMATH_PERM_YZXW );
+    result = vec_madd( tmp0, tmp1, (vec_float4){0.0f,0.0f,0.0f,0.0f} );
+    result = vec_nmsub( tmp2, tmp3, result );
+    return result;
+}
+
+static inline vec_uint4 _vmathVfToHalfFloatsUnpacked(vec_float4 v)
+{
+    vec_int4 bexp;
+    vec_uint4 mant, sign, hfloat;
+    vec_uint4 notZero, isInf;
+    const vec_uint4 hfloatInf = (vec_uint4){0x00007c00u,0x00007c00u,0x00007c00u,0x00007c00u};
+    const vec_uint4 mergeMant = (vec_uint4){0x000003ffu,0x000003ffu,0x000003ffu,0x000003ffu};
+    const vec_uint4 mergeSign = (vec_uint4){0x00008000u,0x00008000u,0x00008000u,0x00008000u};
+
+    sign = vec_sr((vec_uint4)v, (vec_uint4){16,16,16,16});
+    mant = vec_sr((vec_uint4)v, (vec_uint4){13,13,13,13});
+    bexp = vec_and(vec_sr((vec_int4)v, (vec_uint4){23,23,23,23}), (vec_int4){0xff,0xff,0xff,0xff});
+
+    notZero = (vec_uint4)vec_cmpgt(bexp, (vec_int4){112,112,112,112});
+    isInf = (vec_uint4)vec_cmpgt(bexp, (vec_int4){142,142,142,142});
+
+    bexp = vec_add(bexp, (vec_int4){-112,-112,-112,-112});
+    bexp = vec_sl(bexp, (vec_uint4){10,10,10,10});
+
+    hfloat = vec_sel((vec_uint4)bexp, mant, mergeMant);
+    hfloat = vec_sel((vec_uint4){0,0,0,0}, hfloat, notZero);
+    hfloat = vec_sel(hfloat, hfloatInf, isInf);
+    hfloat = vec_sel(hfloat, sign, mergeSign);
+
+    return hfloat;
+}
+
+static inline vec_ushort8 _vmath2VfToHalfFloats(vec_float4 u, vec_float4 v)
+{
+    vec_uint4 hfloat_u, hfloat_v;
+    const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
+    hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
+    hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
+    return (vec_ushort8)vec_perm(hfloat_u, hfloat_v, pack);
+}
+
+#ifndef __GNUC__
+#define __builtin_constant_p(x) 0
+#endif
+
+static inline vec_float4 _vmathVfInsert(vec_float4 dst, vec_float4 src, int slot)
+{
+#ifdef __GNUC__
+    if (__builtin_constant_p(slot)) {
+        dst = vec_sld(dst, dst, slot<<2);
+        dst = vec_sld(dst, src, 4);
+        if (slot != 3) dst = vec_sld(dst, dst, (3-slot)<<2);
+        return dst;
+    } else
+#endif
+    {
+        vec_uchar16 shiftpattern = vec_lvsr( 0, (float *)(size_t)(slot<<2) );
+        vec_uint4 selectmask = (vec_uint4)vec_perm( (vec_uint4){0,0,0,0}, _VECTORMATH_MASK_0xF000, shiftpattern );
+        return vec_sel( dst, src, selectmask );
+    }
+}
+
+#define _vmathVfGetElement(vec, slot) ((float *)&(vec))[slot]
+#ifdef _VECTORMATH_SET_CONSTS_IN_MEM
+#define _vmathVfSetElement(vec, scalar, slot) ((float *)&(vec))[slot] = scalar
+#else
+#define _vmathVfSetElement(vec, scalar, slot)                                            \
+{                                                                                        \
+    if (__builtin_constant_p(scalar)) {                                                  \
+        (vec) = _vmathVfInsert(vec, (vec_float4){scalar, scalar, scalar, scalar}, slot); \
+    } else {                                                                             \
+        ((float *)&(vec))[slot] = scalar;                                                \
+    }                                                                                    \
+}
+#endif
+
+static inline vec_float4 _vmathVfSplatScalar(float scalar)
+{
+    vec_float4 result;
+    if (__builtin_constant_p(scalar)) {
+        result = (vec_float4){scalar, scalar, scalar, scalar};
+    } else {
+        result = vec_ld(0, &scalar);
+        result = vec_splat(vec_perm(result, result, vec_lvsl(0, &scalar)), 0);
+    } 
+    return result;
+}
+
+static inline vec_uint4 _vmathVuiSplatScalar(unsigned int scalar)
+{
+    vec_uint4 result;
+    if (__builtin_constant_p(scalar)) {
+        result = (vec_uint4){scalar, scalar, scalar, scalar};
+    } else {
+        result = vec_ld(0, &scalar);
+        result = vec_splat(vec_perm(result, result, vec_lvsl(0, &scalar)), 0);
+    } 
+    return result;
+}
+
+#endif
+
+namespace Vectormath {
+namespace Aos {
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+inline VecIdx::operator floatInVec() const
+{
+    return floatInVec(ref, i);
+}
+
+inline float VecIdx::getAsFloat() const
+#else
+inline VecIdx::operator float() const
+#endif
+{
+    return _vmathVfGetElement(ref, i);
+}
+
+inline float VecIdx::operator =( float scalar )
+{
+    _vmathVfSetElement(ref, scalar, i);
+    return scalar;
+}
+
+inline floatInVec VecIdx::operator =( floatInVec scalar )
+{
+    ref = _vmathVfInsert(ref, scalar.get128(), i);
+    return scalar;
+}
+
+inline floatInVec VecIdx::operator =( const VecIdx& scalar )
+{
+    return *this = floatInVec(scalar.ref, scalar.i);
+}
+
+inline floatInVec VecIdx::operator *=( float scalar )
+{
+    return *this *= floatInVec(scalar);
+}
+
+inline floatInVec VecIdx::operator *=( floatInVec scalar )
+{
+    return *this = floatInVec(ref, i) * scalar;
+}
+
+inline floatInVec VecIdx::operator /=( float scalar )
+{
+    return *this /= floatInVec(scalar);
+}
+
+inline floatInVec VecIdx::operator /=( floatInVec scalar )
+{
+    return *this = floatInVec(ref, i) / scalar;
+}
+
+inline floatInVec VecIdx::operator +=( float scalar )
+{
+    return *this += floatInVec(scalar);
+}
+
+inline floatInVec VecIdx::operator +=( floatInVec scalar )
+{
+    return *this = floatInVec(ref, i) + scalar;
+}
+
+inline floatInVec VecIdx::operator -=( float scalar )
+{
+    return *this -= floatInVec(scalar);
+}
+
+inline floatInVec VecIdx::operator -=( floatInVec scalar )
+{
+    return *this = floatInVec(ref, i) - scalar;
+}
+
+inline Vector3::Vector3( float _x, float _y, float _z )
+{
+    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) & __builtin_constant_p(_z)) {
+        mVec128 = (vec_float4){_x, _y, _z, 0.0f};
+    } else {
+        float *pf = (float *)&mVec128;
+        pf[0] = _x;
+        pf[1] = _y;
+        pf[2] = _z;
+    }
+}
+
+inline Vector3::Vector3( floatInVec _x, floatInVec _y, floatInVec _z )
+{
+    vec_float4 xz = vec_mergeh( _x.get128(), _z.get128() );
+    mVec128 = vec_mergeh( xz, _y.get128() );
+}
+
+inline Vector3::Vector3( Point3 pnt )
+{
+    mVec128 = pnt.get128();
+}
+
+inline Vector3::Vector3( float scalar )
+{
+    mVec128 = floatInVec(scalar).get128();
+}
+
+inline Vector3::Vector3( floatInVec scalar )
+{
+    mVec128 = scalar.get128();
+}
+
+inline Vector3::Vector3( vec_float4 vf4 )
+{
+    mVec128 = vf4;
+}
+
+inline const Vector3 Vector3::xAxis( )
+{
+    return Vector3( _VECTORMATH_UNIT_1000 );
+}
+
+inline const Vector3 Vector3::yAxis( )
+{
+    return Vector3( _VECTORMATH_UNIT_0100 );
+}
+
+inline const Vector3 Vector3::zAxis( )
+{
+    return Vector3( _VECTORMATH_UNIT_0010 );
+}
+
+inline const Vector3 lerp( float t, Vector3 vec0, Vector3 vec1 )
+{
+    return lerp( floatInVec(t), vec0, vec1 );
+}
+
+inline const Vector3 lerp( floatInVec t, Vector3 vec0, Vector3 vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector3 slerp( float t, Vector3 unitVec0, Vector3 unitVec1 )
+{
+    return slerp( floatInVec(t), unitVec0, unitVec1 );
+}
+
+inline const Vector3 slerp( floatInVec t, Vector3 unitVec0, Vector3 unitVec1 )
+{
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    vec_uint4 selectMask;
+    cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
+    cosAngle = vec_splat( cosAngle, 0 );
+    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = t.get128();
+    oneMinusT = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
+    angles = vec_mergeh( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
+    angles = vec_mergeh( angles, oneMinusT );
+    angles = vec_madd( angles, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sines = sinf4( angles );
+    scales = divf4( sines, vec_splat( sines, 0 ) );
+    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
+    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
+    return Vector3( vec_madd( unitVec0.get128(), scale0, vec_madd( unitVec1.get128(), scale1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+}
+
+inline vec_float4 Vector3::get128( ) const
+{
+    return mVec128;
+}
+
+inline void storeXYZ( Vector3 vec, vec_float4 * quad )
+{
+    vec_float4 dstVec = *quad;
+    vec_uint4 mask = _VECTORMATH_MASK_0x000F;
+    dstVec = vec_sel(vec.get128(), dstVec, mask);
+    *quad = dstVec;
+}
+
+inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const vec_float4 * threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyz1 = vec_sld( xyzx, yzxy, 12 );
+    xyz2 = vec_sld( yzxy, zxyz, 8 );
+    xyz3 = vec_sld( zxyz, zxyz, 4 );
+    vec0 = Vector3( xyzx );
+    vec1 = Vector3( xyz1 );
+    vec2 = Vector3( xyz2 );
+    vec3 = Vector3( xyz3 );
+}
+
+inline void storeXYZArray( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, vec_float4 * threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz;
+    xyzx = vec_perm( vec0.get128(), vec1.get128(), _VECTORMATH_PERM_XYZA );
+    yzxy = vec_perm( vec1.get128(), vec2.get128(), _VECTORMATH_PERM_YZAB );
+    zxyz = vec_perm( vec2.get128(), vec3.get128(), _VECTORMATH_PERM_ZABC );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+inline void storeHalfFloats( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, Vector3 vec4, Vector3 vec5, Vector3 vec6, Vector3 vec7, vec_ushort8 * threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    storeXYZArray( vec0, vec1, vec2, vec3, xyz0 );
+    storeXYZArray( vec4, vec5, vec6, vec7, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+inline Vector3 & Vector3::operator =( Vector3 vec )
+{
+    mVec128 = vec.mVec128;
+    return *this;
+}
+
+inline Vector3 & Vector3::setX( float _x )
+{
+    _vmathVfSetElement(mVec128, _x, 0);
+    return *this;
+}
+
+inline Vector3 & Vector3::setX( floatInVec _x )
+{
+    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
+    return *this;
+}
+
+inline const floatInVec Vector3::getX( ) const
+{
+    return floatInVec( mVec128, 0 );
+}
+
+inline Vector3 & Vector3::setY( float _y )
+{
+    _vmathVfSetElement(mVec128, _y, 1);
+    return *this;
+}
+
+inline Vector3 & Vector3::setY( floatInVec _y )
+{
+    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
+    return *this;
+}
+
+inline const floatInVec Vector3::getY( ) const
+{
+    return floatInVec( mVec128, 1 );
+}
+
+inline Vector3 & Vector3::setZ( float _z )
+{
+    _vmathVfSetElement(mVec128, _z, 2);
+    return *this;
+}
+
+inline Vector3 & Vector3::setZ( floatInVec _z )
+{
+    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
+    return *this;
+}
+
+inline const floatInVec Vector3::getZ( ) const
+{
+    return floatInVec( mVec128, 2 );
+}
+
+inline Vector3 & Vector3::setElem( int idx, float value )
+{
+    _vmathVfSetElement(mVec128, value, idx);
+    return *this;
+}
+
+inline Vector3 & Vector3::setElem( int idx, floatInVec value )
+{
+    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
+    return *this;
+}
+
+inline const floatInVec Vector3::getElem( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+inline VecIdx Vector3::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+inline const floatInVec Vector3::operator []( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+inline const Vector3 Vector3::operator +( Vector3 vec ) const
+{
+    return Vector3( vec_add( mVec128, vec.mVec128 ) );
+}
+
+inline const Vector3 Vector3::operator -( Vector3 vec ) const
+{
+    return Vector3( vec_sub( mVec128, vec.mVec128 ) );
+}
+
+inline const Point3 Vector3::operator +( Point3 pnt ) const
+{
+    return Point3( vec_add( mVec128, pnt.get128() ) );
+}
+
+inline const Vector3 Vector3::operator *( float scalar ) const
+{
+    return *this * floatInVec(scalar);
+}
+
+inline const Vector3 Vector3::operator *( floatInVec scalar ) const
+{
+    return Vector3( vec_madd( mVec128, scalar.get128(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline Vector3 & Vector3::operator +=( Vector3 vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator -=( Vector3 vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator *=( floatInVec scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator /( float scalar ) const
+{
+    return *this / floatInVec(scalar);
+}
+
+inline const Vector3 Vector3::operator /( floatInVec scalar ) const
+{
+    return Vector3( divf4( mVec128, scalar.get128() ) );
+}
+
+inline Vector3 & Vector3::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator /=( floatInVec scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator -( ) const
+{
+    return Vector3( negatef4( mVec128 ) );
+}
+
+inline const Vector3 operator *( float scalar, Vector3 vec )
+{
+    return floatInVec(scalar) * vec;
+}
+
+inline const Vector3 operator *( floatInVec scalar, Vector3 vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector3 mulPerElem( Vector3 vec0, Vector3 vec1 )
+{
+    return Vector3( vec_madd( vec0.get128(), vec1.get128(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline const Vector3 divPerElem( Vector3 vec0, Vector3 vec1 )
+{
+    return Vector3( divf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector3 recipPerElem( Vector3 vec )
+{
+    return Vector3( recipf4( vec.get128() ) );
+}
+
+inline const Vector3 sqrtPerElem( Vector3 vec )
+{
+    return Vector3( sqrtf4( vec.get128() ) );
+}
+
+inline const Vector3 rsqrtPerElem( Vector3 vec )
+{
+    return Vector3( rsqrtf4( vec.get128() ) );
+}
+
+inline const Vector3 absPerElem( Vector3 vec )
+{
+    return Vector3( fabsf4( vec.get128() ) );
+}
+
+inline const Vector3 copySignPerElem( Vector3 vec0, Vector3 vec1 )
+{
+    return Vector3( copysignf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector3 maxPerElem( Vector3 vec0, Vector3 vec1 )
+{
+    return Vector3( fmaxf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline const floatInVec maxElem( Vector3 vec )
+{
+    vec_float4 result;
+    result = fmaxf4( vec_splat( vec.get128(), 1 ), vec.get128() );
+    result = fmaxf4( vec_splat( vec.get128(), 2 ), result );
+    return floatInVec( result, 0 );
+}
+
+inline const Vector3 minPerElem( Vector3 vec0, Vector3 vec1 )
+{
+    return Vector3( fminf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline const floatInVec minElem( Vector3 vec )
+{
+    vec_float4 result;
+    result = fminf4( vec_splat( vec.get128(), 1 ), vec.get128() );
+    result = fminf4( vec_splat( vec.get128(), 2 ), result );
+    return floatInVec( result, 0 );
+}
+
+inline const floatInVec sum( Vector3 vec )
+{
+    vec_float4 result;
+    result = vec_add( vec_splat( vec.get128(), 1 ), vec.get128() );
+    result = vec_add( vec_splat( vec.get128(), 2 ), result );
+    return floatInVec( result, 0 );
+}
+
+inline const floatInVec dot( Vector3 vec0, Vector3 vec1 )
+{
+    return floatInVec( _vmathVfDot3( vec0.get128(), vec1.get128() ), 0 );
+}
+
+inline const floatInVec lengthSqr( Vector3 vec )
+{
+    return floatInVec(  _vmathVfDot3( vec.get128(), vec.get128() ), 0 );
+}
+
+inline const floatInVec length( Vector3 vec )
+{
+    return floatInVec(  sqrtf4(_vmathVfDot3( vec.get128(), vec.get128() )), 0 );
+}
+
+inline const Vector3 normalize( Vector3 vec )
+{
+    vec_float4 dot = _vmathVfDot3( vec.get128(), vec.get128() );
+    dot = vec_splat( dot, 0 );
+    return Vector3( vec_madd( vec.get128(), rsqrtf4( dot ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline const Vector3 cross( Vector3 vec0, Vector3 vec1 )
+{
+    return Vector3( _vmathVfCross( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector3 select( Vector3 vec0, Vector3 vec1, bool select1 )
+{
+    return select( vec0, vec1, boolInVec(select1) );
+}
+
+inline const Vector3 select( Vector3 vec0, Vector3 vec1, boolInVec select1 )
+{
+    return Vector3( vec_sel( vec0.get128(), vec1.get128(), select1.get128() ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( Vector3 vec )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+inline void print( Vector3 vec, const char * name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+#endif
+
+inline Vector4::Vector4( float _x, float _y, float _z, float _w )
+{
+    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) &
+        __builtin_constant_p(_z) & __builtin_constant_p(_w)) {
+        mVec128 = (vec_float4){_x, _y, _z, _w};
+    } else {
+        float *pf = (float *)&mVec128;
+        pf[0] = _x;
+        pf[1] = _y;
+        pf[2] = _z;
+        pf[3] = _w;
+    }
+}
+
+inline Vector4::Vector4( floatInVec _x, floatInVec _y, floatInVec _z, floatInVec _w )
+{
+    vec_float4 xz = vec_mergeh( _x.get128(), _z.get128() );
+    vec_float4 yw = vec_mergeh( _y.get128(), _w.get128() );
+    mVec128 = vec_mergeh( xz, yw );
+}
+
+inline Vector4::Vector4( Vector3 xyz, float _w )
+{
+    mVec128 = xyz.get128();
+    _vmathVfSetElement(mVec128, _w, 3);
+}
+
+inline Vector4::Vector4( Vector3 xyz, floatInVec _w )
+{
+    mVec128 = xyz.get128();
+    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
+}
+
+inline Vector4::Vector4( Vector3 vec )
+{
+    mVec128 = vec.get128();
+    mVec128 = _vmathVfInsert(mVec128, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), 3);
+}
+
+inline Vector4::Vector4( Point3 pnt )
+{
+    mVec128 = pnt.get128();
+    mVec128 = _vmathVfInsert(mVec128, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), 3);
+}
+
+inline Vector4::Vector4( Quat quat )
+{
+    mVec128 = quat.get128();
+}
+
+inline Vector4::Vector4( float scalar )
+{
+    mVec128 = floatInVec(scalar).get128();
+}
+
+inline Vector4::Vector4( floatInVec scalar )
+{
+    mVec128 = scalar.get128();
+}
+
+inline Vector4::Vector4( vec_float4 vf4 )
+{
+    mVec128 = vf4;
+}
+
+inline const Vector4 Vector4::xAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_1000 );
+}
+
+inline const Vector4 Vector4::yAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_0100 );
+}
+
+inline const Vector4 Vector4::zAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_0010 );
+}
+
+inline const Vector4 Vector4::wAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_0001 );
+}
+
+inline const Vector4 lerp( float t, Vector4 vec0, Vector4 vec1 )
+{
+    return lerp( floatInVec(t), vec0, vec1 );
+}
+
+inline const Vector4 lerp( floatInVec t, Vector4 vec0, Vector4 vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector4 slerp( float t, Vector4 unitVec0, Vector4 unitVec1 )
+{
+    return slerp( floatInVec(t), unitVec0, unitVec1 );
+}
+
+inline const Vector4 slerp( floatInVec t, Vector4 unitVec0, Vector4 unitVec1 )
+{
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    vec_uint4 selectMask;
+    cosAngle = _vmathVfDot4( unitVec0.get128(), unitVec1.get128() );
+    cosAngle = vec_splat( cosAngle, 0 );
+    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = t.get128();
+    oneMinusT = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
+    angles = vec_mergeh( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
+    angles = vec_mergeh( angles, oneMinusT );
+    angles = vec_madd( angles, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sines = sinf4( angles );
+    scales = divf4( sines, vec_splat( sines, 0 ) );
+    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
+    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
+    return Vector4( vec_madd( unitVec0.get128(), scale0, vec_madd( unitVec1.get128(), scale1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+}
+
+inline vec_float4 Vector4::get128( ) const
+{
+    return mVec128;
+}
+
+inline void storeHalfFloats( Vector4 vec0, Vector4 vec1, Vector4 vec2, Vector4 vec3, vec_ushort8 * twoQuads )
+{
+    twoQuads[0] = _vmath2VfToHalfFloats(vec0.get128(), vec1.get128());
+    twoQuads[1] = _vmath2VfToHalfFloats(vec2.get128(), vec3.get128());
+}
+
+inline Vector4 & Vector4::operator =( Vector4 vec )
+{
+    mVec128 = vec.mVec128;
+    return *this;
+}
+
+inline Vector4 & Vector4::setXYZ( Vector3 vec )
+{
+    mVec128 = vec_sel( vec.get128(), mVec128, _VECTORMATH_MASK_0x000F );
+    return *this;
+}
+
+inline const Vector3 Vector4::getXYZ( ) const
+{
+    return Vector3( mVec128 );
+}
+
+inline Vector4 & Vector4::setX( float _x )
+{
+    _vmathVfSetElement(mVec128, _x, 0);
+    return *this;
+}
+
+inline Vector4 & Vector4::setX( floatInVec _x )
+{
+    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
+    return *this;
+}
+
+inline const floatInVec Vector4::getX( ) const
+{
+    return floatInVec( mVec128, 0 );
+}
+
+inline Vector4 & Vector4::setY( float _y )
+{
+    _vmathVfSetElement(mVec128, _y, 1);
+    return *this;
+}
+
+inline Vector4 & Vector4::setY( floatInVec _y )
+{
+    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
+    return *this;
+}
+
+inline const floatInVec Vector4::getY( ) const
+{
+    return floatInVec( mVec128, 1 );
+}
+
+inline Vector4 & Vector4::setZ( float _z )
+{
+    _vmathVfSetElement(mVec128, _z, 2);
+    return *this;
+}
+
+inline Vector4 & Vector4::setZ( floatInVec _z )
+{
+    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
+    return *this;
+}
+
+inline const floatInVec Vector4::getZ( ) const
+{
+    return floatInVec( mVec128, 2 );
+}
+
+inline Vector4 & Vector4::setW( float _w )
+{
+    _vmathVfSetElement(mVec128, _w, 3);
+    return *this;
+}
+
+inline Vector4 & Vector4::setW( floatInVec _w )
+{
+    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
+    return *this;
+}
+
+inline const floatInVec Vector4::getW( ) const
+{
+    return floatInVec( mVec128, 3 );
+}
+
+inline Vector4 & Vector4::setElem( int idx, float value )
+{
+    _vmathVfSetElement(mVec128, value, idx);
+    return *this;
+}
+
+inline Vector4 & Vector4::setElem( int idx, floatInVec value )
+{
+    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
+    return *this;
+}
+
+inline const floatInVec Vector4::getElem( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+inline VecIdx Vector4::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+inline const floatInVec Vector4::operator []( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+inline const Vector4 Vector4::operator +( Vector4 vec ) const
+{
+    return Vector4( vec_add( mVec128, vec.mVec128 ) );
+}
+
+inline const Vector4 Vector4::operator -( Vector4 vec ) const
+{
+    return Vector4( vec_sub( mVec128, vec.mVec128 ) );
+}
+
+inline const Vector4 Vector4::operator *( float scalar ) const
+{
+    return *this * floatInVec(scalar);
+}
+
+inline const Vector4 Vector4::operator *( floatInVec scalar ) const
+{
+    return Vector4( vec_madd( mVec128, scalar.get128(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline Vector4 & Vector4::operator +=( Vector4 vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator -=( Vector4 vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator *=( floatInVec scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator /( float scalar ) const
+{
+    return *this / floatInVec(scalar);
+}
+
+inline const Vector4 Vector4::operator /( floatInVec scalar ) const
+{
+    return Vector4( divf4( mVec128, scalar.get128() ) );
+}
+
+inline Vector4 & Vector4::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator /=( floatInVec scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator -( ) const
+{
+    return Vector4( negatef4( mVec128 ) );
+}
+
+inline const Vector4 operator *( float scalar, Vector4 vec )
+{
+    return floatInVec(scalar) * vec;
+}
+
+inline const Vector4 operator *( floatInVec scalar, Vector4 vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector4 mulPerElem( Vector4 vec0, Vector4 vec1 )
+{
+    return Vector4( vec_madd( vec0.get128(), vec1.get128(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline const Vector4 divPerElem( Vector4 vec0, Vector4 vec1 )
+{
+    return Vector4( divf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector4 recipPerElem( Vector4 vec )
+{
+    return Vector4( recipf4( vec.get128() ) );
+}
+
+inline const Vector4 sqrtPerElem( Vector4 vec )
+{
+    return Vector4( sqrtf4( vec.get128() ) );
+}
+
+inline const Vector4 rsqrtPerElem( Vector4 vec )
+{
+    return Vector4( rsqrtf4( vec.get128() ) );
+}
+
+inline const Vector4 absPerElem( Vector4 vec )
+{
+    return Vector4( fabsf4( vec.get128() ) );
+}
+
+inline const Vector4 copySignPerElem( Vector4 vec0, Vector4 vec1 )
+{
+    return Vector4( copysignf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector4 maxPerElem( Vector4 vec0, Vector4 vec1 )
+{
+    return Vector4( fmaxf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline const floatInVec maxElem( Vector4 vec )
+{
+    vec_float4 result;
+    result = fmaxf4( vec_splat( vec.get128(), 1 ), vec.get128() );
+    result = fmaxf4( vec_splat( vec.get128(), 2 ), result );
+    result = fmaxf4( vec_splat( vec.get128(), 3 ), result );
+    return floatInVec( result, 0 );
+}
+
+inline const Vector4 minPerElem( Vector4 vec0, Vector4 vec1 )
+{
+    return Vector4( fminf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline const floatInVec minElem( Vector4 vec )
+{
+    vec_float4 result;
+    result = fminf4( vec_splat( vec.get128(), 1 ), vec.get128() );
+    result = fminf4( vec_splat( vec.get128(), 2 ), result );
+    result = fminf4( vec_splat( vec.get128(), 3 ), result );
+    return floatInVec( result, 0 );
+}
+
+inline const floatInVec sum( Vector4 vec )
+{
+    vec_float4 result;
+    result = vec_add( vec_splat( vec.get128(), 1 ), vec.get128() );
+    result = vec_add( vec_splat( vec.get128(), 2 ), result );
+    result = vec_add( vec_splat( vec.get128(), 3 ), result );
+    return floatInVec( result, 0 );
+}
+
+inline const floatInVec dot( Vector4 vec0, Vector4 vec1 )
+{
+    return floatInVec( _vmathVfDot4( vec0.get128(), vec1.get128() ), 0 );
+}
+
+inline const floatInVec lengthSqr( Vector4 vec )
+{
+    return floatInVec(  _vmathVfDot4( vec.get128(), vec.get128() ), 0 );
+}
+
+inline const floatInVec length( Vector4 vec )
+{
+    return floatInVec(  sqrtf4(_vmathVfDot4( vec.get128(), vec.get128() )), 0 );
+}
+
+inline const Vector4 normalize( Vector4 vec )
+{
+    vec_float4 dot = _vmathVfDot4( vec.get128(), vec.get128() );
+    return Vector4( vec_madd( vec.get128(), rsqrtf4( dot ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline const Vector4 select( Vector4 vec0, Vector4 vec1, bool select1 )
+{
+    return select( vec0, vec1, boolInVec(select1) );
+}
+
+inline const Vector4 select( Vector4 vec0, Vector4 vec1, boolInVec select1 )
+{
+    return Vector4( vec_sel( vec0.get128(), vec1.get128(), select1.get128() ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( Vector4 vec )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+inline void print( Vector4 vec, const char * name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+#endif
+
+inline Point3::Point3( float _x, float _y, float _z )
+{
+    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) & __builtin_constant_p(_z)) {
+        mVec128 = (vec_float4){_x, _y, _z, 0.0f};
+    } else {
+        float *pf = (float *)&mVec128;
+        pf[0] = _x;
+        pf[1] = _y;
+        pf[2] = _z;
+    }
+}
+
+inline Point3::Point3( floatInVec _x, floatInVec _y, floatInVec _z )
+{
+    vec_float4 xz = vec_mergeh( _x.get128(), _z.get128() );
+    mVec128 = vec_mergeh( xz, _y.get128() );
+}
+
+inline Point3::Point3( Vector3 vec )
+{
+    mVec128 = vec.get128();
+}
+
+inline Point3::Point3( float scalar )
+{
+    mVec128 = floatInVec(scalar).get128();
+}
+
+inline Point3::Point3( floatInVec scalar )
+{
+    mVec128 = scalar.get128();
+}
+
+inline Point3::Point3( vec_float4 vf4 )
+{
+    mVec128 = vf4;
+}
+
+inline const Point3 lerp( float t, Point3 pnt0, Point3 pnt1 )
+{
+    return lerp( floatInVec(t), pnt0, pnt1 );
+}
+
+inline const Point3 lerp( floatInVec t, Point3 pnt0, Point3 pnt1 )
+{
+    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
+}
+
+inline vec_float4 Point3::get128( ) const
+{
+    return mVec128;
+}
+
+inline void storeXYZ( Point3 pnt, vec_float4 * quad )
+{
+    vec_float4 dstVec = *quad;
+    vec_uint4 mask = _VECTORMATH_MASK_0x000F;
+    dstVec = vec_sel(pnt.get128(), dstVec, mask);
+    *quad = dstVec;
+}
+
+inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const vec_float4 * threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyz1 = vec_sld( xyzx, yzxy, 12 );
+    xyz2 = vec_sld( yzxy, zxyz, 8 );
+    xyz3 = vec_sld( zxyz, zxyz, 4 );
+    pnt0 = Point3( xyzx );
+    pnt1 = Point3( xyz1 );
+    pnt2 = Point3( xyz2 );
+    pnt3 = Point3( xyz3 );
+}
+
+inline void storeXYZArray( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, vec_float4 * threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz;
+    xyzx = vec_perm( pnt0.get128(), pnt1.get128(), _VECTORMATH_PERM_XYZA );
+    yzxy = vec_perm( pnt1.get128(), pnt2.get128(), _VECTORMATH_PERM_YZAB );
+    zxyz = vec_perm( pnt2.get128(), pnt3.get128(), _VECTORMATH_PERM_ZABC );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+inline void storeHalfFloats( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, Point3 pnt4, Point3 pnt5, Point3 pnt6, Point3 pnt7, vec_ushort8 * threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    storeXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
+    storeXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+inline Point3 & Point3::operator =( Point3 pnt )
+{
+    mVec128 = pnt.mVec128;
+    return *this;
+}
+
+inline Point3 & Point3::setX( float _x )
+{
+    _vmathVfSetElement(mVec128, _x, 0);
+    return *this;
+}
+
+inline Point3 & Point3::setX( floatInVec _x )
+{
+    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
+    return *this;
+}
+
+inline const floatInVec Point3::getX( ) const
+{
+    return floatInVec( mVec128, 0 );
+}
+
+inline Point3 & Point3::setY( float _y )
+{
+    _vmathVfSetElement(mVec128, _y, 1);
+    return *this;
+}
+
+inline Point3 & Point3::setY( floatInVec _y )
+{
+    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
+    return *this;
+}
+
+inline const floatInVec Point3::getY( ) const
+{
+    return floatInVec( mVec128, 1 );
+}
+
+inline Point3 & Point3::setZ( float _z )
+{
+    _vmathVfSetElement(mVec128, _z, 2);
+    return *this;
+}
+
+inline Point3 & Point3::setZ( floatInVec _z )
+{
+    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
+    return *this;
+}
+
+inline const floatInVec Point3::getZ( ) const
+{
+    return floatInVec( mVec128, 2 );
+}
+
+inline Point3 & Point3::setElem( int idx, float value )
+{
+    _vmathVfSetElement(mVec128, value, idx);
+    return *this;
+}
+
+inline Point3 & Point3::setElem( int idx, floatInVec value )
+{
+    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
+    return *this;
+}
+
+inline const floatInVec Point3::getElem( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+inline VecIdx Point3::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+inline const floatInVec Point3::operator []( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+inline const Vector3 Point3::operator -( Point3 pnt ) const
+{
+    return Vector3( vec_sub( mVec128, pnt.mVec128 ) );
+}
+
+inline const Point3 Point3::operator +( Vector3 vec ) const
+{
+    return Point3( vec_add( mVec128, vec.get128() ) );
+}
+
+inline const Point3 Point3::operator -( Vector3 vec ) const
+{
+    return Point3( vec_sub( mVec128, vec.get128() ) );
+}
+
+inline Point3 & Point3::operator +=( Vector3 vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Point3 & Point3::operator -=( Vector3 vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline const Point3 mulPerElem( Point3 pnt0, Point3 pnt1 )
+{
+    return Point3( vec_madd( pnt0.get128(), pnt1.get128(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline const Point3 divPerElem( Point3 pnt0, Point3 pnt1 )
+{
+    return Point3( divf4( pnt0.get128(), pnt1.get128() ) );
+}
+
+inline const Point3 recipPerElem( Point3 pnt )
+{
+    return Point3( recipf4( pnt.get128() ) );
+}
+
+inline const Point3 sqrtPerElem( Point3 pnt )
+{
+    return Point3( sqrtf4( pnt.get128() ) );
+}
+
+inline const Point3 rsqrtPerElem( Point3 pnt )
+{
+    return Point3( rsqrtf4( pnt.get128() ) );
+}
+
+inline const Point3 absPerElem( Point3 pnt )
+{
+    return Point3( fabsf4( pnt.get128() ) );
+}
+
+inline const Point3 copySignPerElem( Point3 pnt0, Point3 pnt1 )
+{
+    return Point3( copysignf4( pnt0.get128(), pnt1.get128() ) );
+}
+
+inline const Point3 maxPerElem( Point3 pnt0, Point3 pnt1 )
+{
+    return Point3( fmaxf4( pnt0.get128(), pnt1.get128() ) );
+}
+
+inline const floatInVec maxElem( Point3 pnt )
+{
+    vec_float4 result;
+    result = fmaxf4( vec_splat( pnt.get128(), 1 ), pnt.get128() );
+    result = fmaxf4( vec_splat( pnt.get128(), 2 ), result );
+    return floatInVec( result, 0 );
+}
+
+inline const Point3 minPerElem( Point3 pnt0, Point3 pnt1 )
+{
+    return Point3( fminf4( pnt0.get128(), pnt1.get128() ) );
+}
+
+inline const floatInVec minElem( Point3 pnt )
+{
+    vec_float4 result;
+    result = fminf4( vec_splat( pnt.get128(), 1 ), pnt.get128() );
+    result = fminf4( vec_splat( pnt.get128(), 2 ), result );
+    return floatInVec( result, 0 );
+}
+
+inline const floatInVec sum( Point3 pnt )
+{
+    vec_float4 result;
+    result = vec_add( vec_splat( pnt.get128(), 1 ), pnt.get128() );
+    result = vec_add( vec_splat( pnt.get128(), 2 ), result );
+    return floatInVec( result, 0 );
+}
+
+inline const Point3 scale( Point3 pnt, float scaleVal )
+{
+    return scale( pnt, floatInVec( scaleVal ) );
+}
+
+inline const Point3 scale( Point3 pnt, floatInVec scaleVal )
+{
+    return mulPerElem( pnt, Point3( scaleVal ) );
+}
+
+inline const Point3 scale( Point3 pnt, Vector3 scaleVec )
+{
+    return mulPerElem( pnt, Point3( scaleVec ) );
+}
+
+inline const floatInVec projection( Point3 pnt, Vector3 unitVec )
+{
+    return floatInVec( _vmathVfDot3( pnt.get128(), unitVec.get128() ), 0 );
+}
+
+inline const floatInVec distSqrFromOrigin( Point3 pnt )
+{
+    return lengthSqr( Vector3( pnt ) );
+}
+
+inline const floatInVec distFromOrigin( Point3 pnt )
+{
+    return length( Vector3( pnt ) );
+}
+
+inline const floatInVec distSqr( Point3 pnt0, Point3 pnt1 )
+{
+    return lengthSqr( ( pnt1 - pnt0 ) );
+}
+
+inline const floatInVec dist( Point3 pnt0, Point3 pnt1 )
+{
+    return length( ( pnt1 - pnt0 ) );
+}
+
+inline const Point3 select( Point3 pnt0, Point3 pnt1, bool select1 )
+{
+    return select( pnt0, pnt1, boolInVec(select1) );
+}
+
+inline const Point3 select( Point3 pnt0, Point3 pnt1, boolInVec select1 )
+{
+    return Point3( vec_sel( pnt0.get128(), pnt1.get128(), select1.get128() ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( Point3 pnt )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = pnt.get128();
+    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+inline void print( Point3 pnt, const char * name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = pnt.get128();
+    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vec_soa.h b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vec_soa.h
index 3f26c9060..89fdfcf7f 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vec_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vec_soa.h
@@ -1,1425 +1,1425 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_SOA_CPP_H
-#define _VECTORMATH_VEC_SOA_CPP_H
-//-----------------------------------------------------------------------------
-// Constants
-// for permutes, words are labeled [x,y,z,w] [a,b,c,d]
-
-#define _VECTORMATH_PERM_X 0x00010203
-#define _VECTORMATH_PERM_Y 0x04050607
-#define _VECTORMATH_PERM_Z 0x08090a0b
-#define _VECTORMATH_PERM_W 0x0c0d0e0f
-#define _VECTORMATH_PERM_A 0x10111213
-#define _VECTORMATH_PERM_B 0x14151617
-#define _VECTORMATH_PERM_C 0x18191a1b
-#define _VECTORMATH_PERM_D 0x1c1d1e1f
-#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_ZDWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_ZCXA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_A })
-#define _VECTORMATH_PERM_XBZD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_WDYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B })
-#define _VECTORMATH_PERM_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_WCYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
-#define _VECTORMATH_PERM_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B })
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-namespace Vectormath {
-namespace Soa {
-
-inline Vector3::Vector3( const Vector3 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-}
-
-inline Vector3::Vector3( vec_float4 _x, vec_float4 _y, vec_float4 _z )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-}
-
-inline Vector3::Vector3( const Point3 & pnt )
-{
-    mX = pnt.getX();
-    mY = pnt.getY();
-    mZ = pnt.getZ();
-}
-
-inline Vector3::Vector3( vec_float4 scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-}
-
-inline Vector3::Vector3( Aos::Vector3 vec )
-{
-    vec_float4 vec128 = vec.get128();
-    mX = vec_splat( vec128, 0 );
-    mY = vec_splat( vec128, 1 );
-    mZ = vec_splat( vec128, 2 );
-}
-
-inline Vector3::Vector3( Aos::Vector3 vec0, Aos::Vector3 vec1, Aos::Vector3 vec2, Aos::Vector3 vec3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = vec_mergeh( vec0.get128(), vec2.get128() );
-    tmp1 = vec_mergeh( vec1.get128(), vec3.get128() );
-    tmp2 = vec_mergel( vec0.get128(), vec2.get128() );
-    tmp3 = vec_mergel( vec1.get128(), vec3.get128() );
-    mX = vec_mergeh( tmp0, tmp1 );
-    mY = vec_mergel( tmp0, tmp1 );
-    mZ = vec_mergeh( tmp2, tmp3 );
-}
-
-inline const Vector3 Vector3::xAxis( )
-{
-    return Vector3( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-inline const Vector3 Vector3::yAxis( )
-{
-    return Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-inline const Vector3 Vector3::zAxis( )
-{
-    return Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-inline const Vector3 lerp( vec_float4 t, const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
-}
-
-inline const Vector3 slerp( vec_float4 t, const Vector3 & unitVec0, const Vector3 & unitVec1 )
-{
-    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
-    vec_uint4 selectMask;
-    cosAngle = dot( unitVec0, unitVec1 );
-    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
-    angle = acosf4( cosAngle );
-    recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
-    scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
-    scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
-    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
-}
-
-inline void Vector3::get4Aos( Aos::Vector3 & result0, Aos::Vector3 & result1, Aos::Vector3 & result2, Aos::Vector3 & result3 ) const
-{
-    vec_float4 tmp0, tmp1;
-    tmp0 = vec_mergeh( mX, mZ );
-    tmp1 = vec_mergel( mX, mZ );
-    result0 = Aos::Vector3( vec_mergeh( tmp0, mY ) );
-    result1 = Aos::Vector3( vec_perm( tmp0, mY, _VECTORMATH_PERM_ZBWX ) );
-    result2 = Aos::Vector3( vec_perm( tmp1, mY, _VECTORMATH_PERM_XCYX ) );
-    result3 = Aos::Vector3( vec_perm( tmp1, mY, _VECTORMATH_PERM_ZDWX ) );
-}
-
-inline void loadXYZArray( Vector3 & vec, const vec_float4 * threeQuads )
-{
-    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyxy = vec_sld( yzxy, xyzx, 8 );
-    zxzx = vec_sld( xyzx, zxyz, 8 );
-    yzyz = vec_sld( zxyz, yzxy, 8 );
-    vec.setX( vec_perm( xyxy, zxzx, _VECTORMATH_PERM_ZBXD ) );
-    vec.setY( vec_perm( xyxy, yzyz, _VECTORMATH_PERM_WCYA ) );
-    vec.setZ( vec_perm( zxzx, yzyz, _VECTORMATH_PERM_XDZB ) );
-}
-
-inline void storeXYZArray( const Vector3 & vec, vec_float4 * threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
-    xyxy = vec_perm( vec.getX(), vec.getY(), _VECTORMATH_PERM_ZCXA );
-    zxzx = vec_perm( vec.getZ(), vec.getX(), _VECTORMATH_PERM_XBZD );
-    yzyz = vec_perm( vec.getY(), vec.getZ(), _VECTORMATH_PERM_WDYB );
-    xyzx = vec_sld( xyxy, zxzx, 8 );
-    yzxy = vec_sld( yzyz, xyxy, 8 );
-    zxyz = vec_sld( zxzx, yzyz, 8 );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-inline void storeHalfFloats( const Vector3 & vec0, const Vector3 & vec1, vec_ushort8 * threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    storeXYZArray( vec0, xyz0 );
-    storeXYZArray( vec1, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-inline Vector3 & Vector3::operator =( const Vector3 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-    return *this;
-}
-
-inline Vector3 & Vector3::setX( vec_float4 _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline vec_float4 Vector3::getX( ) const
-{
-    return mX;
-}
-
-inline Vector3 & Vector3::setY( vec_float4 _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline vec_float4 Vector3::getY( ) const
-{
-    return mY;
-}
-
-inline Vector3 & Vector3::setZ( vec_float4 _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline vec_float4 Vector3::getZ( ) const
-{
-    return mZ;
-}
-
-inline Vector3 & Vector3::setElem( int idx, vec_float4 value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline vec_float4 Vector3::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline Vector3::vec_float4_t & Vector3::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline vec_float4 Vector3::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Vector3 Vector3::operator +( const Vector3 & vec ) const
-{
-    return Vector3(
-        vec_add( mX, vec.mX ),
-        vec_add( mY, vec.mY ),
-        vec_add( mZ, vec.mZ )
-    );
-}
-
-inline const Vector3 Vector3::operator -( const Vector3 & vec ) const
-{
-    return Vector3(
-        vec_sub( mX, vec.mX ),
-        vec_sub( mY, vec.mY ),
-        vec_sub( mZ, vec.mZ )
-    );
-}
-
-inline const Point3 Vector3::operator +( const Point3 & pnt ) const
-{
-    return Point3(
-        vec_add( mX, pnt.getX() ),
-        vec_add( mY, pnt.getY() ),
-        vec_add( mZ, pnt.getZ() )
-    );
-}
-
-inline const Vector3 Vector3::operator *( vec_float4 scalar ) const
-{
-    return Vector3(
-        vec_madd( mX, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( mY, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( mZ, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline Vector3 & Vector3::operator +=( const Vector3 & vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator -=( const Vector3 & vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator *=( vec_float4 scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Vector3 Vector3::operator /( vec_float4 scalar ) const
-{
-    return Vector3(
-        divf4( mX, scalar ),
-        divf4( mY, scalar ),
-        divf4( mZ, scalar )
-    );
-}
-
-inline Vector3 & Vector3::operator /=( vec_float4 scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Vector3 Vector3::operator -( ) const
-{
-    return Vector3(
-        negatef4( mX ),
-        negatef4( mY ),
-        negatef4( mZ )
-    );
-}
-
-inline const Vector3 operator *( vec_float4 scalar, const Vector3 & vec )
-{
-    return vec * scalar;
-}
-
-inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        vec_madd( vec0.getX(), vec1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( vec0.getY(), vec1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( vec0.getZ(), vec1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        divf4( vec0.getX(), vec1.getX() ),
-        divf4( vec0.getY(), vec1.getY() ),
-        divf4( vec0.getZ(), vec1.getZ() )
-    );
-}
-
-inline const Vector3 recipPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getX() ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getY() ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getZ() )
-    );
-}
-
-inline const Vector3 sqrtPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        sqrtf4( vec.getX() ),
-        sqrtf4( vec.getY() ),
-        sqrtf4( vec.getZ() )
-    );
-}
-
-inline const Vector3 rsqrtPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getX() ) ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getY() ) ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getZ() ) )
-    );
-}
-
-inline const Vector3 absPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        fabsf4( vec.getX() ),
-        fabsf4( vec.getY() ),
-        fabsf4( vec.getZ() )
-    );
-}
-
-inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        copysignf4( vec0.getX(), vec1.getX() ),
-        copysignf4( vec0.getY(), vec1.getY() ),
-        copysignf4( vec0.getZ(), vec1.getZ() )
-    );
-}
-
-inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        fmaxf4( vec0.getX(), vec1.getX() ),
-        fmaxf4( vec0.getY(), vec1.getY() ),
-        fmaxf4( vec0.getZ(), vec1.getZ() )
-    );
-}
-
-inline vec_float4 maxElem( const Vector3 & vec )
-{
-    vec_float4 result;
-    result = fmaxf4( vec.getX(), vec.getY() );
-    result = fmaxf4( vec.getZ(), result );
-    return result;
-}
-
-inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        fminf4( vec0.getX(), vec1.getX() ),
-        fminf4( vec0.getY(), vec1.getY() ),
-        fminf4( vec0.getZ(), vec1.getZ() )
-    );
-}
-
-inline vec_float4 minElem( const Vector3 & vec )
-{
-    vec_float4 result;
-    result = fminf4( vec.getX(), vec.getY() );
-    result = fminf4( vec.getZ(), result );
-    return result;
-}
-
-inline vec_float4 sum( const Vector3 & vec )
-{
-    vec_float4 result;
-    result = vec_add( vec.getX(), vec.getY() );
-    result = vec_add( result, vec.getZ() );
-    return result;
-}
-
-inline vec_float4 dot( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    vec_float4 result;
-    result = vec_madd( vec0.getX(), vec1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( vec0.getY(), vec1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( vec0.getZ(), vec1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-inline vec_float4 lengthSqr( const Vector3 & vec )
-{
-    vec_float4 result;
-    result = vec_madd( vec.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( vec.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( vec.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-inline vec_float4 length( const Vector3 & vec )
-{
-    return sqrtf4( lengthSqr( vec ) );
-}
-
-inline const Vector3 normalize( const Vector3 & vec )
-{
-    vec_float4 lenSqr, lenInv;
-    lenSqr = lengthSqr( vec );
-    lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
-    return Vector3(
-        vec_madd( vec.getX(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( vec.getY(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( vec.getZ(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        vec_sub( vec_madd( vec0.getY(), vec1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0.getZ(), vec1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_sub( vec_madd( vec0.getZ(), vec1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0.getX(), vec1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_sub( vec_madd( vec0.getX(), vec1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0.getY(), vec1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
-    );
-}
-
-inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, vec_uint4 select1 )
-{
-    return Vector3(
-        vec_sel( vec0.getX(), vec1.getX(), select1 ),
-        vec_sel( vec0.getY(), vec1.getY(), select1 ),
-        vec_sel( vec0.getZ(), vec1.getZ(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Vector3 & vec )
-{
-    Aos::Vector3 vec0, vec1, vec2, vec3;
-    vec.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-inline void print( const Vector3 & vec, const char * name )
-{
-    Aos::Vector3 vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    vec.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-#endif
-
-inline Vector4::Vector4( const Vector4 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-    mW = vec.mW;
-}
-
-inline Vector4::Vector4( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-    mW = _w;
-}
-
-inline Vector4::Vector4( const Vector3 & xyz, vec_float4 _w )
-{
-    this->setXYZ( xyz );
-    this->setW( _w );
-}
-
-inline Vector4::Vector4( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    mW = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-}
-
-inline Vector4::Vector4( const Point3 & pnt )
-{
-    mX = pnt.getX();
-    mY = pnt.getY();
-    mZ = pnt.getZ();
-    mW = ((vec_float4){1.0f,1.0f,1.0f,1.0f});
-}
-
-inline Vector4::Vector4( const Quat & quat )
-{
-    mX = quat.getX();
-    mY = quat.getY();
-    mZ = quat.getZ();
-    mW = quat.getW();
-}
-
-inline Vector4::Vector4( vec_float4 scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-    mW = scalar;
-}
-
-inline Vector4::Vector4( Aos::Vector4 vec )
-{
-    vec_float4 vec128 = vec.get128();
-    mX = vec_splat( vec128, 0 );
-    mY = vec_splat( vec128, 1 );
-    mZ = vec_splat( vec128, 2 );
-    mW = vec_splat( vec128, 3 );
-}
-
-inline Vector4::Vector4( Aos::Vector4 vec0, Aos::Vector4 vec1, Aos::Vector4 vec2, Aos::Vector4 vec3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = vec_mergeh( vec0.get128(), vec2.get128() );
-    tmp1 = vec_mergeh( vec1.get128(), vec3.get128() );
-    tmp2 = vec_mergel( vec0.get128(), vec2.get128() );
-    tmp3 = vec_mergel( vec1.get128(), vec3.get128() );
-    mX = vec_mergeh( tmp0, tmp1 );
-    mY = vec_mergel( tmp0, tmp1 );
-    mZ = vec_mergeh( tmp2, tmp3 );
-    mW = vec_mergel( tmp2, tmp3 );
-}
-
-inline const Vector4 Vector4::xAxis( )
-{
-    return Vector4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-inline const Vector4 Vector4::yAxis( )
-{
-    return Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-inline const Vector4 Vector4::zAxis( )
-{
-    return Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-inline const Vector4 Vector4::wAxis( )
-{
-    return Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-inline const Vector4 lerp( vec_float4 t, const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
-}
-
-inline const Vector4 slerp( vec_float4 t, const Vector4 & unitVec0, const Vector4 & unitVec1 )
-{
-    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
-    vec_uint4 selectMask;
-    cosAngle = dot( unitVec0, unitVec1 );
-    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
-    angle = acosf4( cosAngle );
-    recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
-    scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
-    scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
-    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
-}
-
-inline void Vector4::get4Aos( Aos::Vector4 & result0, Aos::Vector4 & result1, Aos::Vector4 & result2, Aos::Vector4 & result3 ) const
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = vec_mergeh( mX, mZ );
-    tmp1 = vec_mergeh( mY, mW );
-    tmp2 = vec_mergel( mX, mZ );
-    tmp3 = vec_mergel( mY, mW );
-    result0 = Aos::Vector4( vec_mergeh( tmp0, tmp1 ) );
-    result1 = Aos::Vector4( vec_mergel( tmp0, tmp1 ) );
-    result2 = Aos::Vector4( vec_mergeh( tmp2, tmp3 ) );
-    result3 = Aos::Vector4( vec_mergel( tmp2, tmp3 ) );
-}
-
-inline void storeHalfFloats( const Vector4 & vec, vec_ushort8 * twoQuads )
-{
-    Aos::Vector4 v0, v1, v2, v3;
-    vec.get4Aos( v0, v1, v2, v3 );
-    twoQuads[0] = _vmath2VfToHalfFloats(v0.get128(), v1.get128());
-    twoQuads[1] = _vmath2VfToHalfFloats(v2.get128(), v3.get128());
-}
-
-inline Vector4 & Vector4::operator =( const Vector4 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-    mW = vec.mW;
-    return *this;
-}
-
-inline Vector4 & Vector4::setXYZ( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    return *this;
-}
-
-inline const Vector3 Vector4::getXYZ( ) const
-{
-    return Vector3( mX, mY, mZ );
-}
-
-inline Vector4 & Vector4::setX( vec_float4 _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline vec_float4 Vector4::getX( ) const
-{
-    return mX;
-}
-
-inline Vector4 & Vector4::setY( vec_float4 _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline vec_float4 Vector4::getY( ) const
-{
-    return mY;
-}
-
-inline Vector4 & Vector4::setZ( vec_float4 _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline vec_float4 Vector4::getZ( ) const
-{
-    return mZ;
-}
-
-inline Vector4 & Vector4::setW( vec_float4 _w )
-{
-    mW = _w;
-    return *this;
-}
-
-inline vec_float4 Vector4::getW( ) const
-{
-    return mW;
-}
-
-inline Vector4 & Vector4::setElem( int idx, vec_float4 value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline vec_float4 Vector4::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline Vector4::vec_float4_t & Vector4::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline vec_float4 Vector4::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Vector4 Vector4::operator +( const Vector4 & vec ) const
-{
-    return Vector4(
-        vec_add( mX, vec.mX ),
-        vec_add( mY, vec.mY ),
-        vec_add( mZ, vec.mZ ),
-        vec_add( mW, vec.mW )
-    );
-}
-
-inline const Vector4 Vector4::operator -( const Vector4 & vec ) const
-{
-    return Vector4(
-        vec_sub( mX, vec.mX ),
-        vec_sub( mY, vec.mY ),
-        vec_sub( mZ, vec.mZ ),
-        vec_sub( mW, vec.mW )
-    );
-}
-
-inline const Vector4 Vector4::operator *( vec_float4 scalar ) const
-{
-    return Vector4(
-        vec_madd( mX, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( mY, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( mZ, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( mW, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline Vector4 & Vector4::operator +=( const Vector4 & vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator -=( const Vector4 & vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator *=( vec_float4 scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Vector4 Vector4::operator /( vec_float4 scalar ) const
-{
-    return Vector4(
-        divf4( mX, scalar ),
-        divf4( mY, scalar ),
-        divf4( mZ, scalar ),
-        divf4( mW, scalar )
-    );
-}
-
-inline Vector4 & Vector4::operator /=( vec_float4 scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Vector4 Vector4::operator -( ) const
-{
-    return Vector4(
-        negatef4( mX ),
-        negatef4( mY ),
-        negatef4( mZ ),
-        negatef4( mW )
-    );
-}
-
-inline const Vector4 operator *( vec_float4 scalar, const Vector4 & vec )
-{
-    return vec * scalar;
-}
-
-inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        vec_madd( vec0.getX(), vec1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( vec0.getY(), vec1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( vec0.getZ(), vec1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( vec0.getW(), vec1.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        divf4( vec0.getX(), vec1.getX() ),
-        divf4( vec0.getY(), vec1.getY() ),
-        divf4( vec0.getZ(), vec1.getZ() ),
-        divf4( vec0.getW(), vec1.getW() )
-    );
-}
-
-inline const Vector4 recipPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getX() ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getY() ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getZ() ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getW() )
-    );
-}
-
-inline const Vector4 sqrtPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        sqrtf4( vec.getX() ),
-        sqrtf4( vec.getY() ),
-        sqrtf4( vec.getZ() ),
-        sqrtf4( vec.getW() )
-    );
-}
-
-inline const Vector4 rsqrtPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getX() ) ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getY() ) ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getZ() ) ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getW() ) )
-    );
-}
-
-inline const Vector4 absPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        fabsf4( vec.getX() ),
-        fabsf4( vec.getY() ),
-        fabsf4( vec.getZ() ),
-        fabsf4( vec.getW() )
-    );
-}
-
-inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        copysignf4( vec0.getX(), vec1.getX() ),
-        copysignf4( vec0.getY(), vec1.getY() ),
-        copysignf4( vec0.getZ(), vec1.getZ() ),
-        copysignf4( vec0.getW(), vec1.getW() )
-    );
-}
-
-inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        fmaxf4( vec0.getX(), vec1.getX() ),
-        fmaxf4( vec0.getY(), vec1.getY() ),
-        fmaxf4( vec0.getZ(), vec1.getZ() ),
-        fmaxf4( vec0.getW(), vec1.getW() )
-    );
-}
-
-inline vec_float4 maxElem( const Vector4 & vec )
-{
-    vec_float4 result;
-    result = fmaxf4( vec.getX(), vec.getY() );
-    result = fmaxf4( vec.getZ(), result );
-    result = fmaxf4( vec.getW(), result );
-    return result;
-}
-
-inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        fminf4( vec0.getX(), vec1.getX() ),
-        fminf4( vec0.getY(), vec1.getY() ),
-        fminf4( vec0.getZ(), vec1.getZ() ),
-        fminf4( vec0.getW(), vec1.getW() )
-    );
-}
-
-inline vec_float4 minElem( const Vector4 & vec )
-{
-    vec_float4 result;
-    result = fminf4( vec.getX(), vec.getY() );
-    result = fminf4( vec.getZ(), result );
-    result = fminf4( vec.getW(), result );
-    return result;
-}
-
-inline vec_float4 sum( const Vector4 & vec )
-{
-    vec_float4 result;
-    result = vec_add( vec.getX(), vec.getY() );
-    result = vec_add( result, vec.getZ() );
-    result = vec_add( result, vec.getW() );
-    return result;
-}
-
-inline vec_float4 dot( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    vec_float4 result;
-    result = vec_madd( vec0.getX(), vec1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( vec0.getY(), vec1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( vec0.getZ(), vec1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( vec0.getW(), vec1.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-inline vec_float4 lengthSqr( const Vector4 & vec )
-{
-    vec_float4 result;
-    result = vec_madd( vec.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( vec.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( vec.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( vec.getW(), vec.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-inline vec_float4 length( const Vector4 & vec )
-{
-    return sqrtf4( lengthSqr( vec ) );
-}
-
-inline const Vector4 normalize( const Vector4 & vec )
-{
-    vec_float4 lenSqr, lenInv;
-    lenSqr = lengthSqr( vec );
-    lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
-    return Vector4(
-        vec_madd( vec.getX(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( vec.getY(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( vec.getZ(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( vec.getW(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, vec_uint4 select1 )
-{
-    return Vector4(
-        vec_sel( vec0.getX(), vec1.getX(), select1 ),
-        vec_sel( vec0.getY(), vec1.getY(), select1 ),
-        vec_sel( vec0.getZ(), vec1.getZ(), select1 ),
-        vec_sel( vec0.getW(), vec1.getW(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Vector4 & vec )
-{
-    Aos::Vector4 vec0, vec1, vec2, vec3;
-    vec.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-inline void print( const Vector4 & vec, const char * name )
-{
-    Aos::Vector4 vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    vec.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-#endif
-
-inline Point3::Point3( const Point3 & pnt )
-{
-    mX = pnt.mX;
-    mY = pnt.mY;
-    mZ = pnt.mZ;
-}
-
-inline Point3::Point3( vec_float4 _x, vec_float4 _y, vec_float4 _z )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-}
-
-inline Point3::Point3( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-}
-
-inline Point3::Point3( vec_float4 scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-}
-
-inline Point3::Point3( Aos::Point3 pnt )
-{
-    vec_float4 vec128 = pnt.get128();
-    mX = vec_splat( vec128, 0 );
-    mY = vec_splat( vec128, 1 );
-    mZ = vec_splat( vec128, 2 );
-}
-
-inline Point3::Point3( Aos::Point3 pnt0, Aos::Point3 pnt1, Aos::Point3 pnt2, Aos::Point3 pnt3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = vec_mergeh( pnt0.get128(), pnt2.get128() );
-    tmp1 = vec_mergeh( pnt1.get128(), pnt3.get128() );
-    tmp2 = vec_mergel( pnt0.get128(), pnt2.get128() );
-    tmp3 = vec_mergel( pnt1.get128(), pnt3.get128() );
-    mX = vec_mergeh( tmp0, tmp1 );
-    mY = vec_mergel( tmp0, tmp1 );
-    mZ = vec_mergeh( tmp2, tmp3 );
-}
-
-inline const Point3 lerp( vec_float4 t, const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
-}
-
-inline void Point3::get4Aos( Aos::Point3 & result0, Aos::Point3 & result1, Aos::Point3 & result2, Aos::Point3 & result3 ) const
-{
-    vec_float4 tmp0, tmp1;
-    tmp0 = vec_mergeh( mX, mZ );
-    tmp1 = vec_mergel( mX, mZ );
-    result0 = Aos::Point3( vec_mergeh( tmp0, mY ) );
-    result1 = Aos::Point3( vec_perm( tmp0, mY, _VECTORMATH_PERM_ZBWX ) );
-    result2 = Aos::Point3( vec_perm( tmp1, mY, _VECTORMATH_PERM_XCYX ) );
-    result3 = Aos::Point3( vec_perm( tmp1, mY, _VECTORMATH_PERM_ZDWX ) );
-}
-
-inline void loadXYZArray( Point3 & vec, const vec_float4 * threeQuads )
-{
-    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyxy = vec_sld( yzxy, xyzx, 8 );
-    zxzx = vec_sld( xyzx, zxyz, 8 );
-    yzyz = vec_sld( zxyz, yzxy, 8 );
-    vec.setX( vec_perm( xyxy, zxzx, _VECTORMATH_PERM_ZBXD ) );
-    vec.setY( vec_perm( xyxy, yzyz, _VECTORMATH_PERM_WCYA ) );
-    vec.setZ( vec_perm( zxzx, yzyz, _VECTORMATH_PERM_XDZB ) );
-}
-
-inline void storeXYZArray( const Point3 & vec, vec_float4 * threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
-    xyxy = vec_perm( vec.getX(), vec.getY(), _VECTORMATH_PERM_ZCXA );
-    zxzx = vec_perm( vec.getZ(), vec.getX(), _VECTORMATH_PERM_XBZD );
-    yzyz = vec_perm( vec.getY(), vec.getZ(), _VECTORMATH_PERM_WDYB );
-    xyzx = vec_sld( xyxy, zxzx, 8 );
-    yzxy = vec_sld( yzyz, xyxy, 8 );
-    zxyz = vec_sld( zxzx, yzyz, 8 );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-inline void storeHalfFloats( const Point3 & pnt0, const Point3 & pnt1, vec_ushort8 * threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    storeXYZArray( pnt0, xyz0 );
-    storeXYZArray( pnt1, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-inline Point3 & Point3::operator =( const Point3 & pnt )
-{
-    mX = pnt.mX;
-    mY = pnt.mY;
-    mZ = pnt.mZ;
-    return *this;
-}
-
-inline Point3 & Point3::setX( vec_float4 _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline vec_float4 Point3::getX( ) const
-{
-    return mX;
-}
-
-inline Point3 & Point3::setY( vec_float4 _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline vec_float4 Point3::getY( ) const
-{
-    return mY;
-}
-
-inline Point3 & Point3::setZ( vec_float4 _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline vec_float4 Point3::getZ( ) const
-{
-    return mZ;
-}
-
-inline Point3 & Point3::setElem( int idx, vec_float4 value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline vec_float4 Point3::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline Point3::vec_float4_t & Point3::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline vec_float4 Point3::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Vector3 Point3::operator -( const Point3 & pnt ) const
-{
-    return Vector3(
-        vec_sub( mX, pnt.mX ),
-        vec_sub( mY, pnt.mY ),
-        vec_sub( mZ, pnt.mZ )
-    );
-}
-
-inline const Point3 Point3::operator +( const Vector3 & vec ) const
-{
-    return Point3(
-        vec_add( mX, vec.getX() ),
-        vec_add( mY, vec.getY() ),
-        vec_add( mZ, vec.getZ() )
-    );
-}
-
-inline const Point3 Point3::operator -( const Vector3 & vec ) const
-{
-    return Point3(
-        vec_sub( mX, vec.getX() ),
-        vec_sub( mY, vec.getY() ),
-        vec_sub( mZ, vec.getZ() )
-    );
-}
-
-inline Point3 & Point3::operator +=( const Vector3 & vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Point3 & Point3::operator -=( const Vector3 & vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        vec_madd( pnt0.getX(), pnt1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( pnt0.getY(), pnt1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( pnt0.getZ(), pnt1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        divf4( pnt0.getX(), pnt1.getX() ),
-        divf4( pnt0.getY(), pnt1.getY() ),
-        divf4( pnt0.getZ(), pnt1.getZ() )
-    );
-}
-
-inline const Point3 recipPerElem( const Point3 & pnt )
-{
-    return Point3(
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt.getX() ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt.getY() ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt.getZ() )
-    );
-}
-
-inline const Point3 sqrtPerElem( const Point3 & pnt )
-{
-    return Point3(
-        sqrtf4( pnt.getX() ),
-        sqrtf4( pnt.getY() ),
-        sqrtf4( pnt.getZ() )
-    );
-}
-
-inline const Point3 rsqrtPerElem( const Point3 & pnt )
-{
-    return Point3(
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt.getX() ) ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt.getY() ) ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt.getZ() ) )
-    );
-}
-
-inline const Point3 absPerElem( const Point3 & pnt )
-{
-    return Point3(
-        fabsf4( pnt.getX() ),
-        fabsf4( pnt.getY() ),
-        fabsf4( pnt.getZ() )
-    );
-}
-
-inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        copysignf4( pnt0.getX(), pnt1.getX() ),
-        copysignf4( pnt0.getY(), pnt1.getY() ),
-        copysignf4( pnt0.getZ(), pnt1.getZ() )
-    );
-}
-
-inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        fmaxf4( pnt0.getX(), pnt1.getX() ),
-        fmaxf4( pnt0.getY(), pnt1.getY() ),
-        fmaxf4( pnt0.getZ(), pnt1.getZ() )
-    );
-}
-
-inline vec_float4 maxElem( const Point3 & pnt )
-{
-    vec_float4 result;
-    result = fmaxf4( pnt.getX(), pnt.getY() );
-    result = fmaxf4( pnt.getZ(), result );
-    return result;
-}
-
-inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        fminf4( pnt0.getX(), pnt1.getX() ),
-        fminf4( pnt0.getY(), pnt1.getY() ),
-        fminf4( pnt0.getZ(), pnt1.getZ() )
-    );
-}
-
-inline vec_float4 minElem( const Point3 & pnt )
-{
-    vec_float4 result;
-    result = fminf4( pnt.getX(), pnt.getY() );
-    result = fminf4( pnt.getZ(), result );
-    return result;
-}
-
-inline vec_float4 sum( const Point3 & pnt )
-{
-    vec_float4 result;
-    result = vec_add( pnt.getX(), pnt.getY() );
-    result = vec_add( result, pnt.getZ() );
-    return result;
-}
-
-inline const Point3 scale( const Point3 & pnt, vec_float4 scaleVal )
-{
-    return mulPerElem( pnt, Point3( scaleVal ) );
-}
-
-inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec )
-{
-    return mulPerElem( pnt, Point3( scaleVec ) );
-}
-
-inline vec_float4 projection( const Point3 & pnt, const Vector3 & unitVec )
-{
-    vec_float4 result;
-    result = vec_madd( pnt.getX(), unitVec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( pnt.getY(), unitVec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( pnt.getZ(), unitVec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-inline vec_float4 distSqrFromOrigin( const Point3 & pnt )
-{
-    return lengthSqr( Vector3( pnt ) );
-}
-
-inline vec_float4 distFromOrigin( const Point3 & pnt )
-{
-    return length( Vector3( pnt ) );
-}
-
-inline vec_float4 distSqr( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return lengthSqr( ( pnt1 - pnt0 ) );
-}
-
-inline vec_float4 dist( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return length( ( pnt1 - pnt0 ) );
-}
-
-inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, vec_uint4 select1 )
-{
-    return Point3(
-        vec_sel( pnt0.getX(), pnt1.getX(), select1 ),
-        vec_sel( pnt0.getY(), pnt1.getY(), select1 ),
-        vec_sel( pnt0.getZ(), pnt1.getZ(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Point3 & pnt )
-{
-    Aos::Point3 vec0, vec1, vec2, vec3;
-    pnt.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-inline void print( const Point3 & pnt, const char * name )
-{
-    Aos::Point3 vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    pnt.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-#endif
-
-} // namespace Soa
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_SOA_CPP_H
+#define _VECTORMATH_VEC_SOA_CPP_H
+//-----------------------------------------------------------------------------
+// Constants
+// for permutes, words are labeled [x,y,z,w] [a,b,c,d]
+
+#define _VECTORMATH_PERM_X 0x00010203
+#define _VECTORMATH_PERM_Y 0x04050607
+#define _VECTORMATH_PERM_Z 0x08090a0b
+#define _VECTORMATH_PERM_W 0x0c0d0e0f
+#define _VECTORMATH_PERM_A 0x10111213
+#define _VECTORMATH_PERM_B 0x14151617
+#define _VECTORMATH_PERM_C 0x18191a1b
+#define _VECTORMATH_PERM_D 0x1c1d1e1f
+#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_ZDWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_ZCXA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_A })
+#define _VECTORMATH_PERM_XBZD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_WDYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B })
+#define _VECTORMATH_PERM_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_WCYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
+#define _VECTORMATH_PERM_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B })
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+namespace Vectormath {
+namespace Soa {
+
+inline Vector3::Vector3( const Vector3 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+}
+
+inline Vector3::Vector3( vec_float4 _x, vec_float4 _y, vec_float4 _z )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+}
+
+inline Vector3::Vector3( const Point3 & pnt )
+{
+    mX = pnt.getX();
+    mY = pnt.getY();
+    mZ = pnt.getZ();
+}
+
+inline Vector3::Vector3( vec_float4 scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+}
+
+inline Vector3::Vector3( Aos::Vector3 vec )
+{
+    vec_float4 vec128 = vec.get128();
+    mX = vec_splat( vec128, 0 );
+    mY = vec_splat( vec128, 1 );
+    mZ = vec_splat( vec128, 2 );
+}
+
+inline Vector3::Vector3( Aos::Vector3 vec0, Aos::Vector3 vec1, Aos::Vector3 vec2, Aos::Vector3 vec3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = vec_mergeh( vec0.get128(), vec2.get128() );
+    tmp1 = vec_mergeh( vec1.get128(), vec3.get128() );
+    tmp2 = vec_mergel( vec0.get128(), vec2.get128() );
+    tmp3 = vec_mergel( vec1.get128(), vec3.get128() );
+    mX = vec_mergeh( tmp0, tmp1 );
+    mY = vec_mergel( tmp0, tmp1 );
+    mZ = vec_mergeh( tmp2, tmp3 );
+}
+
+inline const Vector3 Vector3::xAxis( )
+{
+    return Vector3( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+inline const Vector3 Vector3::yAxis( )
+{
+    return Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+inline const Vector3 Vector3::zAxis( )
+{
+    return Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+inline const Vector3 lerp( vec_float4 t, const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector3 slerp( vec_float4 t, const Vector3 & unitVec0, const Vector3 & unitVec1 )
+{
+    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
+    vec_uint4 selectMask;
+    cosAngle = dot( unitVec0, unitVec1 );
+    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
+    angle = acosf4( cosAngle );
+    recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
+    scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
+    scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
+    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
+}
+
+inline void Vector3::get4Aos( Aos::Vector3 & result0, Aos::Vector3 & result1, Aos::Vector3 & result2, Aos::Vector3 & result3 ) const
+{
+    vec_float4 tmp0, tmp1;
+    tmp0 = vec_mergeh( mX, mZ );
+    tmp1 = vec_mergel( mX, mZ );
+    result0 = Aos::Vector3( vec_mergeh( tmp0, mY ) );
+    result1 = Aos::Vector3( vec_perm( tmp0, mY, _VECTORMATH_PERM_ZBWX ) );
+    result2 = Aos::Vector3( vec_perm( tmp1, mY, _VECTORMATH_PERM_XCYX ) );
+    result3 = Aos::Vector3( vec_perm( tmp1, mY, _VECTORMATH_PERM_ZDWX ) );
+}
+
+inline void loadXYZArray( Vector3 & vec, const vec_float4 * threeQuads )
+{
+    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyxy = vec_sld( yzxy, xyzx, 8 );
+    zxzx = vec_sld( xyzx, zxyz, 8 );
+    yzyz = vec_sld( zxyz, yzxy, 8 );
+    vec.setX( vec_perm( xyxy, zxzx, _VECTORMATH_PERM_ZBXD ) );
+    vec.setY( vec_perm( xyxy, yzyz, _VECTORMATH_PERM_WCYA ) );
+    vec.setZ( vec_perm( zxzx, yzyz, _VECTORMATH_PERM_XDZB ) );
+}
+
+inline void storeXYZArray( const Vector3 & vec, vec_float4 * threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
+    xyxy = vec_perm( vec.getX(), vec.getY(), _VECTORMATH_PERM_ZCXA );
+    zxzx = vec_perm( vec.getZ(), vec.getX(), _VECTORMATH_PERM_XBZD );
+    yzyz = vec_perm( vec.getY(), vec.getZ(), _VECTORMATH_PERM_WDYB );
+    xyzx = vec_sld( xyxy, zxzx, 8 );
+    yzxy = vec_sld( yzyz, xyxy, 8 );
+    zxyz = vec_sld( zxzx, yzyz, 8 );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+inline void storeHalfFloats( const Vector3 & vec0, const Vector3 & vec1, vec_ushort8 * threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    storeXYZArray( vec0, xyz0 );
+    storeXYZArray( vec1, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+inline Vector3 & Vector3::operator =( const Vector3 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+    return *this;
+}
+
+inline Vector3 & Vector3::setX( vec_float4 _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline vec_float4 Vector3::getX( ) const
+{
+    return mX;
+}
+
+inline Vector3 & Vector3::setY( vec_float4 _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline vec_float4 Vector3::getY( ) const
+{
+    return mY;
+}
+
+inline Vector3 & Vector3::setZ( vec_float4 _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline vec_float4 Vector3::getZ( ) const
+{
+    return mZ;
+}
+
+inline Vector3 & Vector3::setElem( int idx, vec_float4 value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline vec_float4 Vector3::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline Vector3::vec_float4_t & Vector3::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline vec_float4 Vector3::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Vector3 Vector3::operator +( const Vector3 & vec ) const
+{
+    return Vector3(
+        vec_add( mX, vec.mX ),
+        vec_add( mY, vec.mY ),
+        vec_add( mZ, vec.mZ )
+    );
+}
+
+inline const Vector3 Vector3::operator -( const Vector3 & vec ) const
+{
+    return Vector3(
+        vec_sub( mX, vec.mX ),
+        vec_sub( mY, vec.mY ),
+        vec_sub( mZ, vec.mZ )
+    );
+}
+
+inline const Point3 Vector3::operator +( const Point3 & pnt ) const
+{
+    return Point3(
+        vec_add( mX, pnt.getX() ),
+        vec_add( mY, pnt.getY() ),
+        vec_add( mZ, pnt.getZ() )
+    );
+}
+
+inline const Vector3 Vector3::operator *( vec_float4 scalar ) const
+{
+    return Vector3(
+        vec_madd( mX, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( mY, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( mZ, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline Vector3 & Vector3::operator +=( const Vector3 & vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator -=( const Vector3 & vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator *=( vec_float4 scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator /( vec_float4 scalar ) const
+{
+    return Vector3(
+        divf4( mX, scalar ),
+        divf4( mY, scalar ),
+        divf4( mZ, scalar )
+    );
+}
+
+inline Vector3 & Vector3::operator /=( vec_float4 scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator -( ) const
+{
+    return Vector3(
+        negatef4( mX ),
+        negatef4( mY ),
+        negatef4( mZ )
+    );
+}
+
+inline const Vector3 operator *( vec_float4 scalar, const Vector3 & vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        vec_madd( vec0.getX(), vec1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( vec0.getY(), vec1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( vec0.getZ(), vec1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        divf4( vec0.getX(), vec1.getX() ),
+        divf4( vec0.getY(), vec1.getY() ),
+        divf4( vec0.getZ(), vec1.getZ() )
+    );
+}
+
+inline const Vector3 recipPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getX() ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getY() ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getZ() )
+    );
+}
+
+inline const Vector3 sqrtPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        sqrtf4( vec.getX() ),
+        sqrtf4( vec.getY() ),
+        sqrtf4( vec.getZ() )
+    );
+}
+
+inline const Vector3 rsqrtPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getX() ) ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getY() ) ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getZ() ) )
+    );
+}
+
+inline const Vector3 absPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        fabsf4( vec.getX() ),
+        fabsf4( vec.getY() ),
+        fabsf4( vec.getZ() )
+    );
+}
+
+inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        copysignf4( vec0.getX(), vec1.getX() ),
+        copysignf4( vec0.getY(), vec1.getY() ),
+        copysignf4( vec0.getZ(), vec1.getZ() )
+    );
+}
+
+inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        fmaxf4( vec0.getX(), vec1.getX() ),
+        fmaxf4( vec0.getY(), vec1.getY() ),
+        fmaxf4( vec0.getZ(), vec1.getZ() )
+    );
+}
+
+inline vec_float4 maxElem( const Vector3 & vec )
+{
+    vec_float4 result;
+    result = fmaxf4( vec.getX(), vec.getY() );
+    result = fmaxf4( vec.getZ(), result );
+    return result;
+}
+
+inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        fminf4( vec0.getX(), vec1.getX() ),
+        fminf4( vec0.getY(), vec1.getY() ),
+        fminf4( vec0.getZ(), vec1.getZ() )
+    );
+}
+
+inline vec_float4 minElem( const Vector3 & vec )
+{
+    vec_float4 result;
+    result = fminf4( vec.getX(), vec.getY() );
+    result = fminf4( vec.getZ(), result );
+    return result;
+}
+
+inline vec_float4 sum( const Vector3 & vec )
+{
+    vec_float4 result;
+    result = vec_add( vec.getX(), vec.getY() );
+    result = vec_add( result, vec.getZ() );
+    return result;
+}
+
+inline vec_float4 dot( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    vec_float4 result;
+    result = vec_madd( vec0.getX(), vec1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( vec0.getY(), vec1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( vec0.getZ(), vec1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+inline vec_float4 lengthSqr( const Vector3 & vec )
+{
+    vec_float4 result;
+    result = vec_madd( vec.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( vec.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( vec.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+inline vec_float4 length( const Vector3 & vec )
+{
+    return sqrtf4( lengthSqr( vec ) );
+}
+
+inline const Vector3 normalize( const Vector3 & vec )
+{
+    vec_float4 lenSqr, lenInv;
+    lenSqr = lengthSqr( vec );
+    lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
+    return Vector3(
+        vec_madd( vec.getX(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( vec.getY(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( vec.getZ(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        vec_sub( vec_madd( vec0.getY(), vec1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0.getZ(), vec1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_sub( vec_madd( vec0.getZ(), vec1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0.getX(), vec1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_sub( vec_madd( vec0.getX(), vec1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0.getY(), vec1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
+    );
+}
+
+inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, vec_uint4 select1 )
+{
+    return Vector3(
+        vec_sel( vec0.getX(), vec1.getX(), select1 ),
+        vec_sel( vec0.getY(), vec1.getY(), select1 ),
+        vec_sel( vec0.getZ(), vec1.getZ(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Vector3 & vec )
+{
+    Aos::Vector3 vec0, vec1, vec2, vec3;
+    vec.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+inline void print( const Vector3 & vec, const char * name )
+{
+    Aos::Vector3 vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    vec.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+#endif
+
+inline Vector4::Vector4( const Vector4 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+    mW = vec.mW;
+}
+
+inline Vector4::Vector4( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+    mW = _w;
+}
+
+inline Vector4::Vector4( const Vector3 & xyz, vec_float4 _w )
+{
+    this->setXYZ( xyz );
+    this->setW( _w );
+}
+
+inline Vector4::Vector4( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    mW = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+}
+
+inline Vector4::Vector4( const Point3 & pnt )
+{
+    mX = pnt.getX();
+    mY = pnt.getY();
+    mZ = pnt.getZ();
+    mW = ((vec_float4){1.0f,1.0f,1.0f,1.0f});
+}
+
+inline Vector4::Vector4( const Quat & quat )
+{
+    mX = quat.getX();
+    mY = quat.getY();
+    mZ = quat.getZ();
+    mW = quat.getW();
+}
+
+inline Vector4::Vector4( vec_float4 scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+    mW = scalar;
+}
+
+inline Vector4::Vector4( Aos::Vector4 vec )
+{
+    vec_float4 vec128 = vec.get128();
+    mX = vec_splat( vec128, 0 );
+    mY = vec_splat( vec128, 1 );
+    mZ = vec_splat( vec128, 2 );
+    mW = vec_splat( vec128, 3 );
+}
+
+inline Vector4::Vector4( Aos::Vector4 vec0, Aos::Vector4 vec1, Aos::Vector4 vec2, Aos::Vector4 vec3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = vec_mergeh( vec0.get128(), vec2.get128() );
+    tmp1 = vec_mergeh( vec1.get128(), vec3.get128() );
+    tmp2 = vec_mergel( vec0.get128(), vec2.get128() );
+    tmp3 = vec_mergel( vec1.get128(), vec3.get128() );
+    mX = vec_mergeh( tmp0, tmp1 );
+    mY = vec_mergel( tmp0, tmp1 );
+    mZ = vec_mergeh( tmp2, tmp3 );
+    mW = vec_mergel( tmp2, tmp3 );
+}
+
+inline const Vector4 Vector4::xAxis( )
+{
+    return Vector4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+inline const Vector4 Vector4::yAxis( )
+{
+    return Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+inline const Vector4 Vector4::zAxis( )
+{
+    return Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+inline const Vector4 Vector4::wAxis( )
+{
+    return Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+inline const Vector4 lerp( vec_float4 t, const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector4 slerp( vec_float4 t, const Vector4 & unitVec0, const Vector4 & unitVec1 )
+{
+    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
+    vec_uint4 selectMask;
+    cosAngle = dot( unitVec0, unitVec1 );
+    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
+    angle = acosf4( cosAngle );
+    recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
+    scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
+    scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
+    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
+}
+
+inline void Vector4::get4Aos( Aos::Vector4 & result0, Aos::Vector4 & result1, Aos::Vector4 & result2, Aos::Vector4 & result3 ) const
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = vec_mergeh( mX, mZ );
+    tmp1 = vec_mergeh( mY, mW );
+    tmp2 = vec_mergel( mX, mZ );
+    tmp3 = vec_mergel( mY, mW );
+    result0 = Aos::Vector4( vec_mergeh( tmp0, tmp1 ) );
+    result1 = Aos::Vector4( vec_mergel( tmp0, tmp1 ) );
+    result2 = Aos::Vector4( vec_mergeh( tmp2, tmp3 ) );
+    result3 = Aos::Vector4( vec_mergel( tmp2, tmp3 ) );
+}
+
+inline void storeHalfFloats( const Vector4 & vec, vec_ushort8 * twoQuads )
+{
+    Aos::Vector4 v0, v1, v2, v3;
+    vec.get4Aos( v0, v1, v2, v3 );
+    twoQuads[0] = _vmath2VfToHalfFloats(v0.get128(), v1.get128());
+    twoQuads[1] = _vmath2VfToHalfFloats(v2.get128(), v3.get128());
+}
+
+inline Vector4 & Vector4::operator =( const Vector4 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+    mW = vec.mW;
+    return *this;
+}
+
+inline Vector4 & Vector4::setXYZ( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    return *this;
+}
+
+inline const Vector3 Vector4::getXYZ( ) const
+{
+    return Vector3( mX, mY, mZ );
+}
+
+inline Vector4 & Vector4::setX( vec_float4 _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline vec_float4 Vector4::getX( ) const
+{
+    return mX;
+}
+
+inline Vector4 & Vector4::setY( vec_float4 _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline vec_float4 Vector4::getY( ) const
+{
+    return mY;
+}
+
+inline Vector4 & Vector4::setZ( vec_float4 _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline vec_float4 Vector4::getZ( ) const
+{
+    return mZ;
+}
+
+inline Vector4 & Vector4::setW( vec_float4 _w )
+{
+    mW = _w;
+    return *this;
+}
+
+inline vec_float4 Vector4::getW( ) const
+{
+    return mW;
+}
+
+inline Vector4 & Vector4::setElem( int idx, vec_float4 value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline vec_float4 Vector4::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline Vector4::vec_float4_t & Vector4::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline vec_float4 Vector4::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Vector4 Vector4::operator +( const Vector4 & vec ) const
+{
+    return Vector4(
+        vec_add( mX, vec.mX ),
+        vec_add( mY, vec.mY ),
+        vec_add( mZ, vec.mZ ),
+        vec_add( mW, vec.mW )
+    );
+}
+
+inline const Vector4 Vector4::operator -( const Vector4 & vec ) const
+{
+    return Vector4(
+        vec_sub( mX, vec.mX ),
+        vec_sub( mY, vec.mY ),
+        vec_sub( mZ, vec.mZ ),
+        vec_sub( mW, vec.mW )
+    );
+}
+
+inline const Vector4 Vector4::operator *( vec_float4 scalar ) const
+{
+    return Vector4(
+        vec_madd( mX, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( mY, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( mZ, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( mW, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline Vector4 & Vector4::operator +=( const Vector4 & vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator -=( const Vector4 & vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator *=( vec_float4 scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator /( vec_float4 scalar ) const
+{
+    return Vector4(
+        divf4( mX, scalar ),
+        divf4( mY, scalar ),
+        divf4( mZ, scalar ),
+        divf4( mW, scalar )
+    );
+}
+
+inline Vector4 & Vector4::operator /=( vec_float4 scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator -( ) const
+{
+    return Vector4(
+        negatef4( mX ),
+        negatef4( mY ),
+        negatef4( mZ ),
+        negatef4( mW )
+    );
+}
+
+inline const Vector4 operator *( vec_float4 scalar, const Vector4 & vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        vec_madd( vec0.getX(), vec1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( vec0.getY(), vec1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( vec0.getZ(), vec1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( vec0.getW(), vec1.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        divf4( vec0.getX(), vec1.getX() ),
+        divf4( vec0.getY(), vec1.getY() ),
+        divf4( vec0.getZ(), vec1.getZ() ),
+        divf4( vec0.getW(), vec1.getW() )
+    );
+}
+
+inline const Vector4 recipPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getX() ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getY() ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getZ() ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getW() )
+    );
+}
+
+inline const Vector4 sqrtPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        sqrtf4( vec.getX() ),
+        sqrtf4( vec.getY() ),
+        sqrtf4( vec.getZ() ),
+        sqrtf4( vec.getW() )
+    );
+}
+
+inline const Vector4 rsqrtPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getX() ) ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getY() ) ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getZ() ) ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getW() ) )
+    );
+}
+
+inline const Vector4 absPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        fabsf4( vec.getX() ),
+        fabsf4( vec.getY() ),
+        fabsf4( vec.getZ() ),
+        fabsf4( vec.getW() )
+    );
+}
+
+inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        copysignf4( vec0.getX(), vec1.getX() ),
+        copysignf4( vec0.getY(), vec1.getY() ),
+        copysignf4( vec0.getZ(), vec1.getZ() ),
+        copysignf4( vec0.getW(), vec1.getW() )
+    );
+}
+
+inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        fmaxf4( vec0.getX(), vec1.getX() ),
+        fmaxf4( vec0.getY(), vec1.getY() ),
+        fmaxf4( vec0.getZ(), vec1.getZ() ),
+        fmaxf4( vec0.getW(), vec1.getW() )
+    );
+}
+
+inline vec_float4 maxElem( const Vector4 & vec )
+{
+    vec_float4 result;
+    result = fmaxf4( vec.getX(), vec.getY() );
+    result = fmaxf4( vec.getZ(), result );
+    result = fmaxf4( vec.getW(), result );
+    return result;
+}
+
+inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        fminf4( vec0.getX(), vec1.getX() ),
+        fminf4( vec0.getY(), vec1.getY() ),
+        fminf4( vec0.getZ(), vec1.getZ() ),
+        fminf4( vec0.getW(), vec1.getW() )
+    );
+}
+
+inline vec_float4 minElem( const Vector4 & vec )
+{
+    vec_float4 result;
+    result = fminf4( vec.getX(), vec.getY() );
+    result = fminf4( vec.getZ(), result );
+    result = fminf4( vec.getW(), result );
+    return result;
+}
+
+inline vec_float4 sum( const Vector4 & vec )
+{
+    vec_float4 result;
+    result = vec_add( vec.getX(), vec.getY() );
+    result = vec_add( result, vec.getZ() );
+    result = vec_add( result, vec.getW() );
+    return result;
+}
+
+inline vec_float4 dot( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    vec_float4 result;
+    result = vec_madd( vec0.getX(), vec1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( vec0.getY(), vec1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( vec0.getZ(), vec1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( vec0.getW(), vec1.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+inline vec_float4 lengthSqr( const Vector4 & vec )
+{
+    vec_float4 result;
+    result = vec_madd( vec.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( vec.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( vec.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( vec.getW(), vec.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+inline vec_float4 length( const Vector4 & vec )
+{
+    return sqrtf4( lengthSqr( vec ) );
+}
+
+inline const Vector4 normalize( const Vector4 & vec )
+{
+    vec_float4 lenSqr, lenInv;
+    lenSqr = lengthSqr( vec );
+    lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
+    return Vector4(
+        vec_madd( vec.getX(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( vec.getY(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( vec.getZ(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( vec.getW(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, vec_uint4 select1 )
+{
+    return Vector4(
+        vec_sel( vec0.getX(), vec1.getX(), select1 ),
+        vec_sel( vec0.getY(), vec1.getY(), select1 ),
+        vec_sel( vec0.getZ(), vec1.getZ(), select1 ),
+        vec_sel( vec0.getW(), vec1.getW(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Vector4 & vec )
+{
+    Aos::Vector4 vec0, vec1, vec2, vec3;
+    vec.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+inline void print( const Vector4 & vec, const char * name )
+{
+    Aos::Vector4 vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    vec.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+#endif
+
+inline Point3::Point3( const Point3 & pnt )
+{
+    mX = pnt.mX;
+    mY = pnt.mY;
+    mZ = pnt.mZ;
+}
+
+inline Point3::Point3( vec_float4 _x, vec_float4 _y, vec_float4 _z )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+}
+
+inline Point3::Point3( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+}
+
+inline Point3::Point3( vec_float4 scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+}
+
+inline Point3::Point3( Aos::Point3 pnt )
+{
+    vec_float4 vec128 = pnt.get128();
+    mX = vec_splat( vec128, 0 );
+    mY = vec_splat( vec128, 1 );
+    mZ = vec_splat( vec128, 2 );
+}
+
+inline Point3::Point3( Aos::Point3 pnt0, Aos::Point3 pnt1, Aos::Point3 pnt2, Aos::Point3 pnt3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = vec_mergeh( pnt0.get128(), pnt2.get128() );
+    tmp1 = vec_mergeh( pnt1.get128(), pnt3.get128() );
+    tmp2 = vec_mergel( pnt0.get128(), pnt2.get128() );
+    tmp3 = vec_mergel( pnt1.get128(), pnt3.get128() );
+    mX = vec_mergeh( tmp0, tmp1 );
+    mY = vec_mergel( tmp0, tmp1 );
+    mZ = vec_mergeh( tmp2, tmp3 );
+}
+
+inline const Point3 lerp( vec_float4 t, const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
+}
+
+inline void Point3::get4Aos( Aos::Point3 & result0, Aos::Point3 & result1, Aos::Point3 & result2, Aos::Point3 & result3 ) const
+{
+    vec_float4 tmp0, tmp1;
+    tmp0 = vec_mergeh( mX, mZ );
+    tmp1 = vec_mergel( mX, mZ );
+    result0 = Aos::Point3( vec_mergeh( tmp0, mY ) );
+    result1 = Aos::Point3( vec_perm( tmp0, mY, _VECTORMATH_PERM_ZBWX ) );
+    result2 = Aos::Point3( vec_perm( tmp1, mY, _VECTORMATH_PERM_XCYX ) );
+    result3 = Aos::Point3( vec_perm( tmp1, mY, _VECTORMATH_PERM_ZDWX ) );
+}
+
+inline void loadXYZArray( Point3 & vec, const vec_float4 * threeQuads )
+{
+    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyxy = vec_sld( yzxy, xyzx, 8 );
+    zxzx = vec_sld( xyzx, zxyz, 8 );
+    yzyz = vec_sld( zxyz, yzxy, 8 );
+    vec.setX( vec_perm( xyxy, zxzx, _VECTORMATH_PERM_ZBXD ) );
+    vec.setY( vec_perm( xyxy, yzyz, _VECTORMATH_PERM_WCYA ) );
+    vec.setZ( vec_perm( zxzx, yzyz, _VECTORMATH_PERM_XDZB ) );
+}
+
+inline void storeXYZArray( const Point3 & vec, vec_float4 * threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
+    xyxy = vec_perm( vec.getX(), vec.getY(), _VECTORMATH_PERM_ZCXA );
+    zxzx = vec_perm( vec.getZ(), vec.getX(), _VECTORMATH_PERM_XBZD );
+    yzyz = vec_perm( vec.getY(), vec.getZ(), _VECTORMATH_PERM_WDYB );
+    xyzx = vec_sld( xyxy, zxzx, 8 );
+    yzxy = vec_sld( yzyz, xyxy, 8 );
+    zxyz = vec_sld( zxzx, yzyz, 8 );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+inline void storeHalfFloats( const Point3 & pnt0, const Point3 & pnt1, vec_ushort8 * threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    storeXYZArray( pnt0, xyz0 );
+    storeXYZArray( pnt1, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+inline Point3 & Point3::operator =( const Point3 & pnt )
+{
+    mX = pnt.mX;
+    mY = pnt.mY;
+    mZ = pnt.mZ;
+    return *this;
+}
+
+inline Point3 & Point3::setX( vec_float4 _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline vec_float4 Point3::getX( ) const
+{
+    return mX;
+}
+
+inline Point3 & Point3::setY( vec_float4 _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline vec_float4 Point3::getY( ) const
+{
+    return mY;
+}
+
+inline Point3 & Point3::setZ( vec_float4 _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline vec_float4 Point3::getZ( ) const
+{
+    return mZ;
+}
+
+inline Point3 & Point3::setElem( int idx, vec_float4 value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline vec_float4 Point3::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline Point3::vec_float4_t & Point3::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline vec_float4 Point3::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Vector3 Point3::operator -( const Point3 & pnt ) const
+{
+    return Vector3(
+        vec_sub( mX, pnt.mX ),
+        vec_sub( mY, pnt.mY ),
+        vec_sub( mZ, pnt.mZ )
+    );
+}
+
+inline const Point3 Point3::operator +( const Vector3 & vec ) const
+{
+    return Point3(
+        vec_add( mX, vec.getX() ),
+        vec_add( mY, vec.getY() ),
+        vec_add( mZ, vec.getZ() )
+    );
+}
+
+inline const Point3 Point3::operator -( const Vector3 & vec ) const
+{
+    return Point3(
+        vec_sub( mX, vec.getX() ),
+        vec_sub( mY, vec.getY() ),
+        vec_sub( mZ, vec.getZ() )
+    );
+}
+
+inline Point3 & Point3::operator +=( const Vector3 & vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Point3 & Point3::operator -=( const Vector3 & vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        vec_madd( pnt0.getX(), pnt1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( pnt0.getY(), pnt1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( pnt0.getZ(), pnt1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        divf4( pnt0.getX(), pnt1.getX() ),
+        divf4( pnt0.getY(), pnt1.getY() ),
+        divf4( pnt0.getZ(), pnt1.getZ() )
+    );
+}
+
+inline const Point3 recipPerElem( const Point3 & pnt )
+{
+    return Point3(
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt.getX() ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt.getY() ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt.getZ() )
+    );
+}
+
+inline const Point3 sqrtPerElem( const Point3 & pnt )
+{
+    return Point3(
+        sqrtf4( pnt.getX() ),
+        sqrtf4( pnt.getY() ),
+        sqrtf4( pnt.getZ() )
+    );
+}
+
+inline const Point3 rsqrtPerElem( const Point3 & pnt )
+{
+    return Point3(
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt.getX() ) ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt.getY() ) ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt.getZ() ) )
+    );
+}
+
+inline const Point3 absPerElem( const Point3 & pnt )
+{
+    return Point3(
+        fabsf4( pnt.getX() ),
+        fabsf4( pnt.getY() ),
+        fabsf4( pnt.getZ() )
+    );
+}
+
+inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        copysignf4( pnt0.getX(), pnt1.getX() ),
+        copysignf4( pnt0.getY(), pnt1.getY() ),
+        copysignf4( pnt0.getZ(), pnt1.getZ() )
+    );
+}
+
+inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        fmaxf4( pnt0.getX(), pnt1.getX() ),
+        fmaxf4( pnt0.getY(), pnt1.getY() ),
+        fmaxf4( pnt0.getZ(), pnt1.getZ() )
+    );
+}
+
+inline vec_float4 maxElem( const Point3 & pnt )
+{
+    vec_float4 result;
+    result = fmaxf4( pnt.getX(), pnt.getY() );
+    result = fmaxf4( pnt.getZ(), result );
+    return result;
+}
+
+inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        fminf4( pnt0.getX(), pnt1.getX() ),
+        fminf4( pnt0.getY(), pnt1.getY() ),
+        fminf4( pnt0.getZ(), pnt1.getZ() )
+    );
+}
+
+inline vec_float4 minElem( const Point3 & pnt )
+{
+    vec_float4 result;
+    result = fminf4( pnt.getX(), pnt.getY() );
+    result = fminf4( pnt.getZ(), result );
+    return result;
+}
+
+inline vec_float4 sum( const Point3 & pnt )
+{
+    vec_float4 result;
+    result = vec_add( pnt.getX(), pnt.getY() );
+    result = vec_add( result, pnt.getZ() );
+    return result;
+}
+
+inline const Point3 scale( const Point3 & pnt, vec_float4 scaleVal )
+{
+    return mulPerElem( pnt, Point3( scaleVal ) );
+}
+
+inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec )
+{
+    return mulPerElem( pnt, Point3( scaleVec ) );
+}
+
+inline vec_float4 projection( const Point3 & pnt, const Vector3 & unitVec )
+{
+    vec_float4 result;
+    result = vec_madd( pnt.getX(), unitVec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( pnt.getY(), unitVec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( pnt.getZ(), unitVec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+inline vec_float4 distSqrFromOrigin( const Point3 & pnt )
+{
+    return lengthSqr( Vector3( pnt ) );
+}
+
+inline vec_float4 distFromOrigin( const Point3 & pnt )
+{
+    return length( Vector3( pnt ) );
+}
+
+inline vec_float4 distSqr( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return lengthSqr( ( pnt1 - pnt0 ) );
+}
+
+inline vec_float4 dist( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return length( ( pnt1 - pnt0 ) );
+}
+
+inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, vec_uint4 select1 )
+{
+    return Point3(
+        vec_sel( pnt0.getX(), pnt1.getX(), select1 ),
+        vec_sel( pnt0.getY(), pnt1.getY(), select1 ),
+        vec_sel( pnt0.getZ(), pnt1.getZ(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Point3 & pnt )
+{
+    Aos::Point3 vec0, vec1, vec2, vec3;
+    pnt.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+inline void print( const Point3 & pnt, const char * name )
+{
+    Aos::Point3 vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    pnt.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+#endif
+
+} // namespace Soa
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vecidx_aos.h b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vecidx_aos.h
index 86ddf84fd..df3357570 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vecidx_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vecidx_aos.h
@@ -1,80 +1,80 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VECIDX_AOS_H
-#define _VECTORMATH_VECIDX_AOS_H
-
-#include "floatInVec.h"
-
-namespace Vectormath {
-namespace Aos {
-
-//-----------------------------------------------------------------------------
-// VecIdx 
-// Used in setting elements of Vector3, Vector4, Point3, or Quat with the 
-// subscripting operator.
-//
-
-class VecIdx
-{
-private:
-    typedef vec_float4 vec_float4_t;
-    vec_float4_t &ref __attribute__ ((aligned(16)));
-    int i __attribute__ ((aligned(16)));
-public:
-    inline VecIdx( vec_float4_t& vec, int idx ): ref(vec) { i = idx; }
-
-    // implicitly casts to float unless _VECTORMATH_NO_SCALAR_CAST defined
-    // in which case, implicitly casts to floatInVec, and one must call
-    // getAsFloat to convert to float.
-    //
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-    inline operator floatInVec() const;
-    inline float getAsFloat() const;
-#else
-    inline operator float() const;
-#endif
-
-    inline float operator =( float scalar );
-    inline floatInVec operator =( floatInVec scalar );
-    inline floatInVec operator =( const VecIdx& scalar );
-    inline floatInVec operator *=( float scalar );
-    inline floatInVec operator *=( floatInVec scalar );
-    inline floatInVec operator /=( float scalar );
-    inline floatInVec operator /=( floatInVec scalar );
-    inline floatInVec operator +=( float scalar );
-    inline floatInVec operator +=( floatInVec scalar );
-    inline floatInVec operator -=( float scalar );
-    inline floatInVec operator -=( floatInVec scalar );
-};
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VECIDX_AOS_H
+#define _VECTORMATH_VECIDX_AOS_H
+
+#include "floatInVec.h"
+
+namespace Vectormath {
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// VecIdx 
+// Used in setting elements of Vector3, Vector4, Point3, or Quat with the 
+// subscripting operator.
+//
+
+class VecIdx
+{
+private:
+    typedef vec_float4 vec_float4_t;
+    vec_float4_t &ref __attribute__ ((aligned(16)));
+    int i __attribute__ ((aligned(16)));
+public:
+    inline VecIdx( vec_float4_t& vec, int idx ): ref(vec) { i = idx; }
+
+    // implicitly casts to float unless _VECTORMATH_NO_SCALAR_CAST defined
+    // in which case, implicitly casts to floatInVec, and one must call
+    // getAsFloat to convert to float.
+    //
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+    inline operator floatInVec() const;
+    inline float getAsFloat() const;
+#else
+    inline operator float() const;
+#endif
+
+    inline float operator =( float scalar );
+    inline floatInVec operator =( floatInVec scalar );
+    inline floatInVec operator =( const VecIdx& scalar );
+    inline floatInVec operator *=( float scalar );
+    inline floatInVec operator *=( floatInVec scalar );
+    inline floatInVec operator /=( float scalar );
+    inline floatInVec operator /=( floatInVec scalar );
+    inline floatInVec operator +=( float scalar );
+    inline floatInVec operator +=( floatInVec scalar );
+    inline floatInVec operator -=( float scalar );
+    inline floatInVec operator -=( floatInVec scalar );
+};
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vectormath_aos.h b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vectormath_aos.h
index dfa041999..0a7fa1738 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vectormath_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vectormath_aos.h
@@ -1,2244 +1,2244 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_AOS_CPP_PPU_H
-#define _VECTORMATH_AOS_CPP_PPU_H
-
-#include <math.h>
-#include <altivec.h>
-#include "vecidx_aos.h"
-#include "floatInVec.h"
-#include "boolInVec.h"
-
-#ifdef _VECTORMATH_DEBUG
-#include <stdio.h>
-#endif
-
-namespace Vectormath {
-
-namespace Aos {
-
-//-----------------------------------------------------------------------------
-// Forward Declarations
-//
-
-class Vector3;
-class Vector4;
-class Point3;
-class Quat;
-class Matrix3;
-class Matrix4;
-class Transform3;
-
-// A 3-D vector in array-of-structures format
-//
-class Vector3
-{
-    vec_float4 mVec128;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Vector3( ) { };
-
-    // Construct a 3-D vector from x, y, and z elements
-    // 
-    inline Vector3( float x, float y, float z );
-
-    // Construct a 3-D vector from x, y, and z elements (scalar data contained in vector data type)
-    // 
-    inline Vector3( floatInVec x, floatInVec y, floatInVec z );
-
-    // Copy elements from a 3-D point into a 3-D vector
-    // 
-    explicit inline Vector3( Point3 pnt );
-
-    // Set all elements of a 3-D vector to the same scalar value
-    // 
-    explicit inline Vector3( float scalar );
-
-    // Set all elements of a 3-D vector to the same scalar value (scalar data contained in vector data type)
-    // 
-    explicit inline Vector3( floatInVec scalar );
-
-    // Set vector float data in a 3-D vector
-    // 
-    explicit inline Vector3( vec_float4 vf4 );
-
-    // Get vector float data from a 3-D vector
-    // 
-    inline vec_float4 get128( ) const;
-
-    // Assign one 3-D vector to another
-    // 
-    inline Vector3 & operator =( Vector3 vec );
-
-    // Set the x element of a 3-D vector
-    // 
-    inline Vector3 & setX( float x );
-
-    // Set the y element of a 3-D vector
-    // 
-    inline Vector3 & setY( float y );
-
-    // Set the z element of a 3-D vector
-    // 
-    inline Vector3 & setZ( float z );
-
-    // Set the x element of a 3-D vector (scalar data contained in vector data type)
-    // 
-    inline Vector3 & setX( floatInVec x );
-
-    // Set the y element of a 3-D vector (scalar data contained in vector data type)
-    // 
-    inline Vector3 & setY( floatInVec y );
-
-    // Set the z element of a 3-D vector (scalar data contained in vector data type)
-    // 
-    inline Vector3 & setZ( floatInVec z );
-
-    // Get the x element of a 3-D vector
-    // 
-    inline const floatInVec getX( ) const;
-
-    // Get the y element of a 3-D vector
-    // 
-    inline const floatInVec getY( ) const;
-
-    // Get the z element of a 3-D vector
-    // 
-    inline const floatInVec getZ( ) const;
-
-    // Set an x, y, or z element of a 3-D vector by index
-    // 
-    inline Vector3 & setElem( int idx, float value );
-
-    // Set an x, y, or z element of a 3-D vector by index (scalar data contained in vector data type)
-    // 
-    inline Vector3 & setElem( int idx, floatInVec value );
-
-    // Get an x, y, or z element of a 3-D vector by index
-    // 
-    inline const floatInVec getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline VecIdx operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline const floatInVec operator []( int idx ) const;
-
-    // Add two 3-D vectors
-    // 
-    inline const Vector3 operator +( Vector3 vec ) const;
-
-    // Subtract a 3-D vector from another 3-D vector
-    // 
-    inline const Vector3 operator -( Vector3 vec ) const;
-
-    // Add a 3-D vector to a 3-D point
-    // 
-    inline const Point3 operator +( Point3 pnt ) const;
-
-    // Multiply a 3-D vector by a scalar
-    // 
-    inline const Vector3 operator *( float scalar ) const;
-
-    // Divide a 3-D vector by a scalar
-    // 
-    inline const Vector3 operator /( float scalar ) const;
-
-    // Multiply a 3-D vector by a scalar (scalar data contained in vector data type)
-    // 
-    inline const Vector3 operator *( floatInVec scalar ) const;
-
-    // Divide a 3-D vector by a scalar (scalar data contained in vector data type)
-    // 
-    inline const Vector3 operator /( floatInVec scalar ) const;
-
-    // Perform compound assignment and addition with a 3-D vector
-    // 
-    inline Vector3 & operator +=( Vector3 vec );
-
-    // Perform compound assignment and subtraction by a 3-D vector
-    // 
-    inline Vector3 & operator -=( Vector3 vec );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Vector3 & operator *=( float scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Vector3 & operator /=( float scalar );
-
-    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
-    // 
-    inline Vector3 & operator *=( floatInVec scalar );
-
-    // Perform compound assignment and division by a scalar (scalar data contained in vector data type)
-    // 
-    inline Vector3 & operator /=( floatInVec scalar );
-
-    // Negate all elements of a 3-D vector
-    // 
-    inline const Vector3 operator -( ) const;
-
-    // Construct x axis
-    // 
-    static inline const Vector3 xAxis( );
-
-    // Construct y axis
-    // 
-    static inline const Vector3 yAxis( );
-
-    // Construct z axis
-    // 
-    static inline const Vector3 zAxis( );
-
-};
-
-// Multiply a 3-D vector by a scalar
-// 
-inline const Vector3 operator *( float scalar, Vector3 vec );
-
-// Multiply a 3-D vector by a scalar (scalar data contained in vector data type)
-// 
-inline const Vector3 operator *( floatInVec scalar, Vector3 vec );
-
-// Multiply two 3-D vectors per element
-// 
-inline const Vector3 mulPerElem( Vector3 vec0, Vector3 vec1 );
-
-// Divide two 3-D vectors per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Vector3 divPerElem( Vector3 vec0, Vector3 vec1 );
-
-// Compute the reciprocal of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Vector3 recipPerElem( Vector3 vec );
-
-// Compute the square root of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Vector3 sqrtPerElem( Vector3 vec );
-
-// Compute the reciprocal square root of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Vector3 rsqrtPerElem( Vector3 vec );
-
-// Compute the absolute value of a 3-D vector per element
-// 
-inline const Vector3 absPerElem( Vector3 vec );
-
-// Copy sign from one 3-D vector to another, per element
-// 
-inline const Vector3 copySignPerElem( Vector3 vec0, Vector3 vec1 );
-
-// Maximum of two 3-D vectors per element
-// 
-inline const Vector3 maxPerElem( Vector3 vec0, Vector3 vec1 );
-
-// Minimum of two 3-D vectors per element
-// 
-inline const Vector3 minPerElem( Vector3 vec0, Vector3 vec1 );
-
-// Maximum element of a 3-D vector
-// 
-inline const floatInVec maxElem( Vector3 vec );
-
-// Minimum element of a 3-D vector
-// 
-inline const floatInVec minElem( Vector3 vec );
-
-// Compute the sum of all elements of a 3-D vector
-// 
-inline const floatInVec sum( Vector3 vec );
-
-// Compute the dot product of two 3-D vectors
-// 
-inline const floatInVec dot( Vector3 vec0, Vector3 vec1 );
-
-// Compute the square of the length of a 3-D vector
-// 
-inline const floatInVec lengthSqr( Vector3 vec );
-
-// Compute the length of a 3-D vector
-// 
-inline const floatInVec length( Vector3 vec );
-
-// Normalize a 3-D vector
-// NOTE: 
-// The result is unpredictable when all elements of vec are at or near zero.
-// 
-inline const Vector3 normalize( Vector3 vec );
-
-// Compute cross product of two 3-D vectors
-// 
-inline const Vector3 cross( Vector3 vec0, Vector3 vec1 );
-
-// Outer product of two 3-D vectors
-// 
-inline const Matrix3 outer( Vector3 vec0, Vector3 vec1 );
-
-// Pre-multiply a row vector by a 3x3 matrix
-// NOTE: 
-// Slower than column post-multiply.
-// 
-inline const Vector3 rowMul( Vector3 vec, const Matrix3 & mat );
-
-// Cross-product matrix of a 3-D vector
-// 
-inline const Matrix3 crossMatrix( Vector3 vec );
-
-// Create cross-product matrix and multiply
-// NOTE: 
-// Faster than separately creating a cross-product matrix and multiplying.
-// 
-inline const Matrix3 crossMatrixMul( Vector3 vec, const Matrix3 & mat );
-
-// Linear interpolation between two 3-D vectors
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 lerp( float t, Vector3 vec0, Vector3 vec1 );
-
-// Linear interpolation between two 3-D vectors (scalar data contained in vector data type)
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 lerp( floatInVec t, Vector3 vec0, Vector3 vec1 );
-
-// Spherical linear interpolation between two 3-D vectors
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 slerp( float t, Vector3 unitVec0, Vector3 unitVec1 );
-
-// Spherical linear interpolation between two 3-D vectors (scalar data contained in vector data type)
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 slerp( floatInVec t, Vector3 unitVec0, Vector3 unitVec1 );
-
-// Conditionally select between two 3-D vectors
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// However, the transfer of select1 to a VMX register may use more processing time than a branch.
-// Use the boolInVec version for better performance.
-// 
-inline const Vector3 select( Vector3 vec0, Vector3 vec1, bool select1 );
-
-// Conditionally select between two 3-D vectors (scalar data contained in vector data type)
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Vector3 select( Vector3 vec0, Vector3 vec1, boolInVec select1 );
-
-// Store x, y, and z elements of a 3-D vector in the first three words of a quadword.
-// The value of the fourth word (the word with the highest address) remains unchanged
-// 
-inline void storeXYZ( Vector3 vec, vec_float4 * quad );
-
-// Load four three-float 3-D vectors, stored in three quadwords
-// 
-inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const vec_float4 * threeQuads );
-
-// Store four 3-D vectors in three quadwords
-// 
-inline void storeXYZArray( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, vec_float4 * threeQuads );
-
-// Store eight 3-D vectors as half-floats
-// 
-inline void storeHalfFloats( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, Vector3 vec4, Vector3 vec5, Vector3 vec6, Vector3 vec7, vec_ushort8 * threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3-D vector
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Vector3 vec );
-
-// Print a 3-D vector and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Vector3 vec, const char * name );
-
-#endif
-
-// A 4-D vector in array-of-structures format
-//
-class Vector4
-{
-    vec_float4 mVec128;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Vector4( ) { };
-
-    // Construct a 4-D vector from x, y, z, and w elements
-    // 
-    inline Vector4( float x, float y, float z, float w );
-
-    // Construct a 4-D vector from x, y, z, and w elements (scalar data contained in vector data type)
-    // 
-    inline Vector4( floatInVec x, floatInVec y, floatInVec z, floatInVec w );
-
-    // Construct a 4-D vector from a 3-D vector and a scalar
-    // 
-    inline Vector4( Vector3 xyz, float w );
-
-    // Construct a 4-D vector from a 3-D vector and a scalar (scalar data contained in vector data type)
-    // 
-    inline Vector4( Vector3 xyz, floatInVec w );
-
-    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
-    // 
-    explicit inline Vector4( Vector3 vec );
-
-    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
-    // 
-    explicit inline Vector4( Point3 pnt );
-
-    // Copy elements from a quaternion into a 4-D vector
-    // 
-    explicit inline Vector4( Quat quat );
-
-    // Set all elements of a 4-D vector to the same scalar value
-    // 
-    explicit inline Vector4( float scalar );
-
-    // Set all elements of a 4-D vector to the same scalar value (scalar data contained in vector data type)
-    // 
-    explicit inline Vector4( floatInVec scalar );
-
-    // Set vector float data in a 4-D vector
-    // 
-    explicit inline Vector4( vec_float4 vf4 );
-
-    // Get vector float data from a 4-D vector
-    // 
-    inline vec_float4 get128( ) const;
-
-    // Assign one 4-D vector to another
-    // 
-    inline Vector4 & operator =( Vector4 vec );
-
-    // Set the x, y, and z elements of a 4-D vector
-    // NOTE: 
-    // This function does not change the w element.
-    // 
-    inline Vector4 & setXYZ( Vector3 vec );
-
-    // Get the x, y, and z elements of a 4-D vector
-    // 
-    inline const Vector3 getXYZ( ) const;
-
-    // Set the x element of a 4-D vector
-    // 
-    inline Vector4 & setX( float x );
-
-    // Set the y element of a 4-D vector
-    // 
-    inline Vector4 & setY( float y );
-
-    // Set the z element of a 4-D vector
-    // 
-    inline Vector4 & setZ( float z );
-
-    // Set the w element of a 4-D vector
-    // 
-    inline Vector4 & setW( float w );
-
-    // Set the x element of a 4-D vector (scalar data contained in vector data type)
-    // 
-    inline Vector4 & setX( floatInVec x );
-
-    // Set the y element of a 4-D vector (scalar data contained in vector data type)
-    // 
-    inline Vector4 & setY( floatInVec y );
-
-    // Set the z element of a 4-D vector (scalar data contained in vector data type)
-    // 
-    inline Vector4 & setZ( floatInVec z );
-
-    // Set the w element of a 4-D vector (scalar data contained in vector data type)
-    // 
-    inline Vector4 & setW( floatInVec w );
-
-    // Get the x element of a 4-D vector
-    // 
-    inline const floatInVec getX( ) const;
-
-    // Get the y element of a 4-D vector
-    // 
-    inline const floatInVec getY( ) const;
-
-    // Get the z element of a 4-D vector
-    // 
-    inline const floatInVec getZ( ) const;
-
-    // Get the w element of a 4-D vector
-    // 
-    inline const floatInVec getW( ) const;
-
-    // Set an x, y, z, or w element of a 4-D vector by index
-    // 
-    inline Vector4 & setElem( int idx, float value );
-
-    // Set an x, y, z, or w element of a 4-D vector by index (scalar data contained in vector data type)
-    // 
-    inline Vector4 & setElem( int idx, floatInVec value );
-
-    // Get an x, y, z, or w element of a 4-D vector by index
-    // 
-    inline const floatInVec getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline VecIdx operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline const floatInVec operator []( int idx ) const;
-
-    // Add two 4-D vectors
-    // 
-    inline const Vector4 operator +( Vector4 vec ) const;
-
-    // Subtract a 4-D vector from another 4-D vector
-    // 
-    inline const Vector4 operator -( Vector4 vec ) const;
-
-    // Multiply a 4-D vector by a scalar
-    // 
-    inline const Vector4 operator *( float scalar ) const;
-
-    // Divide a 4-D vector by a scalar
-    // 
-    inline const Vector4 operator /( float scalar ) const;
-
-    // Multiply a 4-D vector by a scalar (scalar data contained in vector data type)
-    // 
-    inline const Vector4 operator *( floatInVec scalar ) const;
-
-    // Divide a 4-D vector by a scalar (scalar data contained in vector data type)
-    // 
-    inline const Vector4 operator /( floatInVec scalar ) const;
-
-    // Perform compound assignment and addition with a 4-D vector
-    // 
-    inline Vector4 & operator +=( Vector4 vec );
-
-    // Perform compound assignment and subtraction by a 4-D vector
-    // 
-    inline Vector4 & operator -=( Vector4 vec );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Vector4 & operator *=( float scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Vector4 & operator /=( float scalar );
-
-    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
-    // 
-    inline Vector4 & operator *=( floatInVec scalar );
-
-    // Perform compound assignment and division by a scalar (scalar data contained in vector data type)
-    // 
-    inline Vector4 & operator /=( floatInVec scalar );
-
-    // Negate all elements of a 4-D vector
-    // 
-    inline const Vector4 operator -( ) const;
-
-    // Construct x axis
-    // 
-    static inline const Vector4 xAxis( );
-
-    // Construct y axis
-    // 
-    static inline const Vector4 yAxis( );
-
-    // Construct z axis
-    // 
-    static inline const Vector4 zAxis( );
-
-    // Construct w axis
-    // 
-    static inline const Vector4 wAxis( );
-
-};
-
-// Multiply a 4-D vector by a scalar
-// 
-inline const Vector4 operator *( float scalar, Vector4 vec );
-
-// Multiply a 4-D vector by a scalar (scalar data contained in vector data type)
-// 
-inline const Vector4 operator *( floatInVec scalar, Vector4 vec );
-
-// Multiply two 4-D vectors per element
-// 
-inline const Vector4 mulPerElem( Vector4 vec0, Vector4 vec1 );
-
-// Divide two 4-D vectors per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Vector4 divPerElem( Vector4 vec0, Vector4 vec1 );
-
-// Compute the reciprocal of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Vector4 recipPerElem( Vector4 vec );
-
-// Compute the square root of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Vector4 sqrtPerElem( Vector4 vec );
-
-// Compute the reciprocal square root of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Vector4 rsqrtPerElem( Vector4 vec );
-
-// Compute the absolute value of a 4-D vector per element
-// 
-inline const Vector4 absPerElem( Vector4 vec );
-
-// Copy sign from one 4-D vector to another, per element
-// 
-inline const Vector4 copySignPerElem( Vector4 vec0, Vector4 vec1 );
-
-// Maximum of two 4-D vectors per element
-// 
-inline const Vector4 maxPerElem( Vector4 vec0, Vector4 vec1 );
-
-// Minimum of two 4-D vectors per element
-// 
-inline const Vector4 minPerElem( Vector4 vec0, Vector4 vec1 );
-
-// Maximum element of a 4-D vector
-// 
-inline const floatInVec maxElem( Vector4 vec );
-
-// Minimum element of a 4-D vector
-// 
-inline const floatInVec minElem( Vector4 vec );
-
-// Compute the sum of all elements of a 4-D vector
-// 
-inline const floatInVec sum( Vector4 vec );
-
-// Compute the dot product of two 4-D vectors
-// 
-inline const floatInVec dot( Vector4 vec0, Vector4 vec1 );
-
-// Compute the square of the length of a 4-D vector
-// 
-inline const floatInVec lengthSqr( Vector4 vec );
-
-// Compute the length of a 4-D vector
-// 
-inline const floatInVec length( Vector4 vec );
-
-// Normalize a 4-D vector
-// NOTE: 
-// The result is unpredictable when all elements of vec are at or near zero.
-// 
-inline const Vector4 normalize( Vector4 vec );
-
-// Outer product of two 4-D vectors
-// 
-inline const Matrix4 outer( Vector4 vec0, Vector4 vec1 );
-
-// Linear interpolation between two 4-D vectors
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 lerp( float t, Vector4 vec0, Vector4 vec1 );
-
-// Linear interpolation between two 4-D vectors (scalar data contained in vector data type)
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 lerp( floatInVec t, Vector4 vec0, Vector4 vec1 );
-
-// Spherical linear interpolation between two 4-D vectors
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 slerp( float t, Vector4 unitVec0, Vector4 unitVec1 );
-
-// Spherical linear interpolation between two 4-D vectors (scalar data contained in vector data type)
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 slerp( floatInVec t, Vector4 unitVec0, Vector4 unitVec1 );
-
-// Conditionally select between two 4-D vectors
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// However, the transfer of select1 to a VMX register may use more processing time than a branch.
-// Use the boolInVec version for better performance.
-// 
-inline const Vector4 select( Vector4 vec0, Vector4 vec1, bool select1 );
-
-// Conditionally select between two 4-D vectors (scalar data contained in vector data type)
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Vector4 select( Vector4 vec0, Vector4 vec1, boolInVec select1 );
-
-// Store four 4-D vectors as half-floats
-// 
-inline void storeHalfFloats( Vector4 vec0, Vector4 vec1, Vector4 vec2, Vector4 vec3, vec_ushort8 * twoQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 4-D vector
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Vector4 vec );
-
-// Print a 4-D vector and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Vector4 vec, const char * name );
-
-#endif
-
-// A 3-D point in array-of-structures format
-//
-class Point3
-{
-    vec_float4 mVec128;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Point3( ) { };
-
-    // Construct a 3-D point from x, y, and z elements
-    // 
-    inline Point3( float x, float y, float z );
-
-    // Construct a 3-D point from x, y, and z elements (scalar data contained in vector data type)
-    // 
-    inline Point3( floatInVec x, floatInVec y, floatInVec z );
-
-    // Copy elements from a 3-D vector into a 3-D point
-    // 
-    explicit inline Point3( Vector3 vec );
-
-    // Set all elements of a 3-D point to the same scalar value
-    // 
-    explicit inline Point3( float scalar );
-
-    // Set all elements of a 3-D point to the same scalar value (scalar data contained in vector data type)
-    // 
-    explicit inline Point3( floatInVec scalar );
-
-    // Set vector float data in a 3-D point
-    // 
-    explicit inline Point3( vec_float4 vf4 );
-
-    // Get vector float data from a 3-D point
-    // 
-    inline vec_float4 get128( ) const;
-
-    // Assign one 3-D point to another
-    // 
-    inline Point3 & operator =( Point3 pnt );
-
-    // Set the x element of a 3-D point
-    // 
-    inline Point3 & setX( float x );
-
-    // Set the y element of a 3-D point
-    // 
-    inline Point3 & setY( float y );
-
-    // Set the z element of a 3-D point
-    // 
-    inline Point3 & setZ( float z );
-
-    // Set the x element of a 3-D point (scalar data contained in vector data type)
-    // 
-    inline Point3 & setX( floatInVec x );
-
-    // Set the y element of a 3-D point (scalar data contained in vector data type)
-    // 
-    inline Point3 & setY( floatInVec y );
-
-    // Set the z element of a 3-D point (scalar data contained in vector data type)
-    // 
-    inline Point3 & setZ( floatInVec z );
-
-    // Get the x element of a 3-D point
-    // 
-    inline const floatInVec getX( ) const;
-
-    // Get the y element of a 3-D point
-    // 
-    inline const floatInVec getY( ) const;
-
-    // Get the z element of a 3-D point
-    // 
-    inline const floatInVec getZ( ) const;
-
-    // Set an x, y, or z element of a 3-D point by index
-    // 
-    inline Point3 & setElem( int idx, float value );
-
-    // Set an x, y, or z element of a 3-D point by index (scalar data contained in vector data type)
-    // 
-    inline Point3 & setElem( int idx, floatInVec value );
-
-    // Get an x, y, or z element of a 3-D point by index
-    // 
-    inline const floatInVec getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline VecIdx operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline const floatInVec operator []( int idx ) const;
-
-    // Subtract a 3-D point from another 3-D point
-    // 
-    inline const Vector3 operator -( Point3 pnt ) const;
-
-    // Add a 3-D point to a 3-D vector
-    // 
-    inline const Point3 operator +( Vector3 vec ) const;
-
-    // Subtract a 3-D vector from a 3-D point
-    // 
-    inline const Point3 operator -( Vector3 vec ) const;
-
-    // Perform compound assignment and addition with a 3-D vector
-    // 
-    inline Point3 & operator +=( Vector3 vec );
-
-    // Perform compound assignment and subtraction by a 3-D vector
-    // 
-    inline Point3 & operator -=( Vector3 vec );
-
-};
-
-// Multiply two 3-D points per element
-// 
-inline const Point3 mulPerElem( Point3 pnt0, Point3 pnt1 );
-
-// Divide two 3-D points per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Point3 divPerElem( Point3 pnt0, Point3 pnt1 );
-
-// Compute the reciprocal of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Point3 recipPerElem( Point3 pnt );
-
-// Compute the square root of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Point3 sqrtPerElem( Point3 pnt );
-
-// Compute the reciprocal square root of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Point3 rsqrtPerElem( Point3 pnt );
-
-// Compute the absolute value of a 3-D point per element
-// 
-inline const Point3 absPerElem( Point3 pnt );
-
-// Copy sign from one 3-D point to another, per element
-// 
-inline const Point3 copySignPerElem( Point3 pnt0, Point3 pnt1 );
-
-// Maximum of two 3-D points per element
-// 
-inline const Point3 maxPerElem( Point3 pnt0, Point3 pnt1 );
-
-// Minimum of two 3-D points per element
-// 
-inline const Point3 minPerElem( Point3 pnt0, Point3 pnt1 );
-
-// Maximum element of a 3-D point
-// 
-inline const floatInVec maxElem( Point3 pnt );
-
-// Minimum element of a 3-D point
-// 
-inline const floatInVec minElem( Point3 pnt );
-
-// Compute the sum of all elements of a 3-D point
-// 
-inline const floatInVec sum( Point3 pnt );
-
-// Apply uniform scale to a 3-D point
-// 
-inline const Point3 scale( Point3 pnt, float scaleVal );
-
-// Apply uniform scale to a 3-D point (scalar data contained in vector data type)
-// 
-inline const Point3 scale( Point3 pnt, floatInVec scaleVal );
-
-// Apply non-uniform scale to a 3-D point
-// 
-inline const Point3 scale( Point3 pnt, Vector3 scaleVec );
-
-// Scalar projection of a 3-D point on a unit-length 3-D vector
-// 
-inline const floatInVec projection( Point3 pnt, Vector3 unitVec );
-
-// Compute the square of the distance of a 3-D point from the coordinate-system origin
-// 
-inline const floatInVec distSqrFromOrigin( Point3 pnt );
-
-// Compute the distance of a 3-D point from the coordinate-system origin
-// 
-inline const floatInVec distFromOrigin( Point3 pnt );
-
-// Compute the square of the distance between two 3-D points
-// 
-inline const floatInVec distSqr( Point3 pnt0, Point3 pnt1 );
-
-// Compute the distance between two 3-D points
-// 
-inline const floatInVec dist( Point3 pnt0, Point3 pnt1 );
-
-// Linear interpolation between two 3-D points
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Point3 lerp( float t, Point3 pnt0, Point3 pnt1 );
-
-// Linear interpolation between two 3-D points (scalar data contained in vector data type)
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Point3 lerp( floatInVec t, Point3 pnt0, Point3 pnt1 );
-
-// Conditionally select between two 3-D points
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// However, the transfer of select1 to a VMX register may use more processing time than a branch.
-// Use the boolInVec version for better performance.
-// 
-inline const Point3 select( Point3 pnt0, Point3 pnt1, bool select1 );
-
-// Conditionally select between two 3-D points (scalar data contained in vector data type)
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Point3 select( Point3 pnt0, Point3 pnt1, boolInVec select1 );
-
-// Store x, y, and z elements of a 3-D point in the first three words of a quadword.
-// The value of the fourth word (the word with the highest address) remains unchanged
-// 
-inline void storeXYZ( Point3 pnt, vec_float4 * quad );
-
-// Load four three-float 3-D points, stored in three quadwords
-// 
-inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const vec_float4 * threeQuads );
-
-// Store four 3-D points in three quadwords
-// 
-inline void storeXYZArray( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, vec_float4 * threeQuads );
-
-// Store eight 3-D points as half-floats
-// 
-inline void storeHalfFloats( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, Point3 pnt4, Point3 pnt5, Point3 pnt6, Point3 pnt7, vec_ushort8 * threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3-D point
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Point3 pnt );
-
-// Print a 3-D point and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Point3 pnt, const char * name );
-
-#endif
-
-// A quaternion in array-of-structures format
-//
-class Quat
-{
-    vec_float4 mVec128;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Quat( ) { };
-
-    // Construct a quaternion from x, y, z, and w elements
-    // 
-    inline Quat( float x, float y, float z, float w );
-
-    // Construct a quaternion from x, y, z, and w elements (scalar data contained in vector data type)
-    // 
-    inline Quat( floatInVec x, floatInVec y, floatInVec z, floatInVec w );
-
-    // Construct a quaternion from a 3-D vector and a scalar
-    // 
-    inline Quat( Vector3 xyz, float w );
-
-    // Construct a quaternion from a 3-D vector and a scalar (scalar data contained in vector data type)
-    // 
-    inline Quat( Vector3 xyz, floatInVec w );
-
-    // Copy elements from a 4-D vector into a quaternion
-    // 
-    explicit inline Quat( Vector4 vec );
-
-    // Convert a rotation matrix to a unit-length quaternion
-    // 
-    explicit inline Quat( const Matrix3 & rotMat );
-
-    // Set all elements of a quaternion to the same scalar value
-    // 
-    explicit inline Quat( float scalar );
-
-    // Set all elements of a quaternion to the same scalar value (scalar data contained in vector data type)
-    // 
-    explicit inline Quat( floatInVec scalar );
-
-    // Set vector float data in a quaternion
-    // 
-    explicit inline Quat( vec_float4 vf4 );
-
-    // Get vector float data from a quaternion
-    // 
-    inline vec_float4 get128( ) const;
-
-    // Assign one quaternion to another
-    // 
-    inline Quat & operator =( Quat quat );
-
-    // Set the x, y, and z elements of a quaternion
-    // NOTE: 
-    // This function does not change the w element.
-    // 
-    inline Quat & setXYZ( Vector3 vec );
-
-    // Get the x, y, and z elements of a quaternion
-    // 
-    inline const Vector3 getXYZ( ) const;
-
-    // Set the x element of a quaternion
-    // 
-    inline Quat & setX( float x );
-
-    // Set the y element of a quaternion
-    // 
-    inline Quat & setY( float y );
-
-    // Set the z element of a quaternion
-    // 
-    inline Quat & setZ( float z );
-
-    // Set the w element of a quaternion
-    // 
-    inline Quat & setW( float w );
-
-    // Set the x element of a quaternion (scalar data contained in vector data type)
-    // 
-    inline Quat & setX( floatInVec x );
-
-    // Set the y element of a quaternion (scalar data contained in vector data type)
-    // 
-    inline Quat & setY( floatInVec y );
-
-    // Set the z element of a quaternion (scalar data contained in vector data type)
-    // 
-    inline Quat & setZ( floatInVec z );
-
-    // Set the w element of a quaternion (scalar data contained in vector data type)
-    // 
-    inline Quat & setW( floatInVec w );
-
-    // Get the x element of a quaternion
-    // 
-    inline const floatInVec getX( ) const;
-
-    // Get the y element of a quaternion
-    // 
-    inline const floatInVec getY( ) const;
-
-    // Get the z element of a quaternion
-    // 
-    inline const floatInVec getZ( ) const;
-
-    // Get the w element of a quaternion
-    // 
-    inline const floatInVec getW( ) const;
-
-    // Set an x, y, z, or w element of a quaternion by index
-    // 
-    inline Quat & setElem( int idx, float value );
-
-    // Set an x, y, z, or w element of a quaternion by index (scalar data contained in vector data type)
-    // 
-    inline Quat & setElem( int idx, floatInVec value );
-
-    // Get an x, y, z, or w element of a quaternion by index
-    // 
-    inline const floatInVec getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline VecIdx operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline const floatInVec operator []( int idx ) const;
-
-    // Add two quaternions
-    // 
-    inline const Quat operator +( Quat quat ) const;
-
-    // Subtract a quaternion from another quaternion
-    // 
-    inline const Quat operator -( Quat quat ) const;
-
-    // Multiply two quaternions
-    // 
-    inline const Quat operator *( Quat quat ) const;
-
-    // Multiply a quaternion by a scalar
-    // 
-    inline const Quat operator *( float scalar ) const;
-
-    // Divide a quaternion by a scalar
-    // 
-    inline const Quat operator /( float scalar ) const;
-
-    // Multiply a quaternion by a scalar (scalar data contained in vector data type)
-    // 
-    inline const Quat operator *( floatInVec scalar ) const;
-
-    // Divide a quaternion by a scalar (scalar data contained in vector data type)
-    // 
-    inline const Quat operator /( floatInVec scalar ) const;
-
-    // Perform compound assignment and addition with a quaternion
-    // 
-    inline Quat & operator +=( Quat quat );
-
-    // Perform compound assignment and subtraction by a quaternion
-    // 
-    inline Quat & operator -=( Quat quat );
-
-    // Perform compound assignment and multiplication by a quaternion
-    // 
-    inline Quat & operator *=( Quat quat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Quat & operator *=( float scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Quat & operator /=( float scalar );
-
-    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
-    // 
-    inline Quat & operator *=( floatInVec scalar );
-
-    // Perform compound assignment and division by a scalar (scalar data contained in vector data type)
-    // 
-    inline Quat & operator /=( floatInVec scalar );
-
-    // Negate all elements of a quaternion
-    // 
-    inline const Quat operator -( ) const;
-
-    // Construct an identity quaternion
-    // 
-    static inline const Quat identity( );
-
-    // Construct a quaternion to rotate between two unit-length 3-D vectors
-    // NOTE: 
-    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
-    // 
-    static inline const Quat rotation( Vector3 unitVec0, Vector3 unitVec1 );
-
-    // Construct a quaternion to rotate around a unit-length 3-D vector
-    // 
-    static inline const Quat rotation( float radians, Vector3 unitVec );
-
-    // Construct a quaternion to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
-    // 
-    static inline const Quat rotation( floatInVec radians, Vector3 unitVec );
-
-    // Construct a quaternion to rotate around the x axis
-    // 
-    static inline const Quat rotationX( float radians );
-
-    // Construct a quaternion to rotate around the y axis
-    // 
-    static inline const Quat rotationY( float radians );
-
-    // Construct a quaternion to rotate around the z axis
-    // 
-    static inline const Quat rotationZ( float radians );
-
-    // Construct a quaternion to rotate around the x axis (scalar data contained in vector data type)
-    // 
-    static inline const Quat rotationX( floatInVec radians );
-
-    // Construct a quaternion to rotate around the y axis (scalar data contained in vector data type)
-    // 
-    static inline const Quat rotationY( floatInVec radians );
-
-    // Construct a quaternion to rotate around the z axis (scalar data contained in vector data type)
-    // 
-    static inline const Quat rotationZ( floatInVec radians );
-
-};
-
-// Multiply a quaternion by a scalar
-// 
-inline const Quat operator *( float scalar, Quat quat );
-
-// Multiply a quaternion by a scalar (scalar data contained in vector data type)
-// 
-inline const Quat operator *( floatInVec scalar, Quat quat );
-
-// Compute the conjugate of a quaternion
-// 
-inline const Quat conj( Quat quat );
-
-// Use a unit-length quaternion to rotate a 3-D vector
-// 
-inline const Vector3 rotate( Quat unitQuat, Vector3 vec );
-
-// Compute the dot product of two quaternions
-// 
-inline const floatInVec dot( Quat quat0, Quat quat1 );
-
-// Compute the norm of a quaternion
-// 
-inline const floatInVec norm( Quat quat );
-
-// Compute the length of a quaternion
-// 
-inline const floatInVec length( Quat quat );
-
-// Normalize a quaternion
-// NOTE: 
-// The result is unpredictable when all elements of quat are at or near zero.
-// 
-inline const Quat normalize( Quat quat );
-
-// Linear interpolation between two quaternions
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat lerp( float t, Quat quat0, Quat quat1 );
-
-// Linear interpolation between two quaternions (scalar data contained in vector data type)
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat lerp( floatInVec t, Quat quat0, Quat quat1 );
-
-// Spherical linear interpolation between two quaternions
-// NOTE: 
-// Interpolates along the shortest path between orientations.
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat slerp( float t, Quat unitQuat0, Quat unitQuat1 );
-
-// Spherical linear interpolation between two quaternions (scalar data contained in vector data type)
-// NOTE: 
-// Interpolates along the shortest path between orientations.
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat slerp( floatInVec t, Quat unitQuat0, Quat unitQuat1 );
-
-// Spherical quadrangle interpolation
-// 
-inline const Quat squad( float t, Quat unitQuat0, Quat unitQuat1, Quat unitQuat2, Quat unitQuat3 );
-
-// Spherical quadrangle interpolation (scalar data contained in vector data type)
-// 
-inline const Quat squad( floatInVec t, Quat unitQuat0, Quat unitQuat1, Quat unitQuat2, Quat unitQuat3 );
-
-// Conditionally select between two quaternions
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// However, the transfer of select1 to a VMX register may use more processing time than a branch.
-// Use the boolInVec version for better performance.
-// 
-inline const Quat select( Quat quat0, Quat quat1, bool select1 );
-
-// Conditionally select between two quaternions (scalar data contained in vector data type)
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Quat select( Quat quat0, Quat quat1, boolInVec select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a quaternion
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Quat quat );
-
-// Print a quaternion and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Quat quat, const char * name );
-
-#endif
-
-// A 3x3 matrix in array-of-structures format
-//
-class Matrix3
-{
-    Vector3 mCol0;
-    Vector3 mCol1;
-    Vector3 mCol2;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Matrix3( ) { };
-
-    // Copy a 3x3 matrix
-    // 
-    inline Matrix3( const Matrix3 & mat );
-
-    // Construct a 3x3 matrix containing the specified columns
-    // 
-    inline Matrix3( Vector3 col0, Vector3 col1, Vector3 col2 );
-
-    // Construct a 3x3 rotation matrix from a unit-length quaternion
-    // 
-    explicit inline Matrix3( Quat unitQuat );
-
-    // Set all elements of a 3x3 matrix to the same scalar value
-    // 
-    explicit inline Matrix3( float scalar );
-
-    // Set all elements of a 3x3 matrix to the same scalar value (scalar data contained in vector data type)
-    // 
-    explicit inline Matrix3( floatInVec scalar );
-
-    // Assign one 3x3 matrix to another
-    // 
-    inline Matrix3 & operator =( const Matrix3 & mat );
-
-    // Set column 0 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol0( Vector3 col0 );
-
-    // Set column 1 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol1( Vector3 col1 );
-
-    // Set column 2 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol2( Vector3 col2 );
-
-    // Get column 0 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol0( ) const;
-
-    // Get column 1 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol1( ) const;
-
-    // Get column 2 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol2( ) const;
-
-    // Set the column of a 3x3 matrix referred to by the specified index
-    // 
-    inline Matrix3 & setCol( int col, Vector3 vec );
-
-    // Set the row of a 3x3 matrix referred to by the specified index
-    // 
-    inline Matrix3 & setRow( int row, Vector3 vec );
-
-    // Get the column of a 3x3 matrix referred to by the specified index
-    // 
-    inline const Vector3 getCol( int col ) const;
-
-    // Get the row of a 3x3 matrix referred to by the specified index
-    // 
-    inline const Vector3 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector3 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector3 operator []( int col ) const;
-
-    // Set the element of a 3x3 matrix referred to by column and row indices
-    // 
-    inline Matrix3 & setElem( int col, int row, float val );
-
-    // Set the element of a 3x3 matrix referred to by column and row indices (scalar data contained in vector data type)
-    // 
-    inline Matrix3 & setElem( int col, int row, floatInVec val );
-
-    // Get the element of a 3x3 matrix referred to by column and row indices
-    // 
-    inline const floatInVec getElem( int col, int row ) const;
-
-    // Add two 3x3 matrices
-    // 
-    inline const Matrix3 operator +( const Matrix3 & mat ) const;
-
-    // Subtract a 3x3 matrix from another 3x3 matrix
-    // 
-    inline const Matrix3 operator -( const Matrix3 & mat ) const;
-
-    // Negate all elements of a 3x3 matrix
-    // 
-    inline const Matrix3 operator -( ) const;
-
-    // Multiply a 3x3 matrix by a scalar
-    // 
-    inline const Matrix3 operator *( float scalar ) const;
-
-    // Multiply a 3x3 matrix by a scalar (scalar data contained in vector data type)
-    // 
-    inline const Matrix3 operator *( floatInVec scalar ) const;
-
-    // Multiply a 3x3 matrix by a 3-D vector
-    // 
-    inline const Vector3 operator *( Vector3 vec ) const;
-
-    // Multiply two 3x3 matrices
-    // 
-    inline const Matrix3 operator *( const Matrix3 & mat ) const;
-
-    // Perform compound assignment and addition with a 3x3 matrix
-    // 
-    inline Matrix3 & operator +=( const Matrix3 & mat );
-
-    // Perform compound assignment and subtraction by a 3x3 matrix
-    // 
-    inline Matrix3 & operator -=( const Matrix3 & mat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Matrix3 & operator *=( float scalar );
-
-    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
-    // 
-    inline Matrix3 & operator *=( floatInVec scalar );
-
-    // Perform compound assignment and multiplication by a 3x3 matrix
-    // 
-    inline Matrix3 & operator *=( const Matrix3 & mat );
-
-    // Construct an identity 3x3 matrix
-    // 
-    static inline const Matrix3 identity( );
-
-    // Construct a 3x3 matrix to rotate around the x axis
-    // 
-    static inline const Matrix3 rotationX( float radians );
-
-    // Construct a 3x3 matrix to rotate around the y axis
-    // 
-    static inline const Matrix3 rotationY( float radians );
-
-    // Construct a 3x3 matrix to rotate around the z axis
-    // 
-    static inline const Matrix3 rotationZ( float radians );
-
-    // Construct a 3x3 matrix to rotate around the x axis (scalar data contained in vector data type)
-    // 
-    static inline const Matrix3 rotationX( floatInVec radians );
-
-    // Construct a 3x3 matrix to rotate around the y axis (scalar data contained in vector data type)
-    // 
-    static inline const Matrix3 rotationY( floatInVec radians );
-
-    // Construct a 3x3 matrix to rotate around the z axis (scalar data contained in vector data type)
-    // 
-    static inline const Matrix3 rotationZ( floatInVec radians );
-
-    // Construct a 3x3 matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Matrix3 rotationZYX( Vector3 radiansXYZ );
-
-    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Matrix3 rotation( float radians, Vector3 unitVec );
-
-    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
-    // 
-    static inline const Matrix3 rotation( floatInVec radians, Vector3 unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Matrix3 rotation( Quat unitQuat );
-
-    // Construct a 3x3 matrix to perform scaling
-    // 
-    static inline const Matrix3 scale( Vector3 scaleVec );
-
-};
-// Multiply a 3x3 matrix by a scalar
-// 
-inline const Matrix3 operator *( float scalar, const Matrix3 & mat );
-
-// Multiply a 3x3 matrix by a scalar (scalar data contained in vector data type)
-// 
-inline const Matrix3 operator *( floatInVec scalar, const Matrix3 & mat );
-
-// Append (post-multiply) a scale transformation to a 3x3 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix3 appendScale( const Matrix3 & mat, Vector3 scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix3 prependScale( Vector3 scaleVec, const Matrix3 & mat );
-
-// Multiply two 3x3 matrices per element
-// 
-inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
-
-// Compute the absolute value of a 3x3 matrix per element
-// 
-inline const Matrix3 absPerElem( const Matrix3 & mat );
-
-// Transpose of a 3x3 matrix
-// 
-inline const Matrix3 transpose( const Matrix3 & mat );
-
-// Compute the inverse of a 3x3 matrix
-// NOTE: 
-// Result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix3 inverse( const Matrix3 & mat );
-
-// Determinant of a 3x3 matrix
-// 
-inline const floatInVec determinant( const Matrix3 & mat );
-
-// Conditionally select between two 3x3 matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// However, the transfer of select1 to a VMX register may use more processing time than a branch.
-// Use the boolInVec version for better performance.
-// 
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 );
-
-// Conditionally select between two 3x3 matrices (scalar data contained in vector data type)
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, boolInVec select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3x3 matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix3 & mat );
-
-// Print a 3x3 matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix3 & mat, const char * name );
-
-#endif
-
-// A 4x4 matrix in array-of-structures format
-//
-class Matrix4
-{
-    Vector4 mCol0;
-    Vector4 mCol1;
-    Vector4 mCol2;
-    Vector4 mCol3;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Matrix4( ) { };
-
-    // Copy a 4x4 matrix
-    // 
-    inline Matrix4( const Matrix4 & mat );
-
-    // Construct a 4x4 matrix containing the specified columns
-    // 
-    inline Matrix4( Vector4 col0, Vector4 col1, Vector4 col2, Vector4 col3 );
-
-    // Construct a 4x4 matrix from a 3x4 transformation matrix
-    // 
-    explicit inline Matrix4( const Transform3 & mat );
-
-    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
-    // 
-    inline Matrix4( const Matrix3 & mat, Vector3 translateVec );
-
-    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
-    // 
-    inline Matrix4( Quat unitQuat, Vector3 translateVec );
-
-    // Set all elements of a 4x4 matrix to the same scalar value
-    // 
-    explicit inline Matrix4( float scalar );
-
-    // Set all elements of a 4x4 matrix to the same scalar value (scalar data contained in vector data type)
-    // 
-    explicit inline Matrix4( floatInVec scalar );
-
-    // Assign one 4x4 matrix to another
-    // 
-    inline Matrix4 & operator =( const Matrix4 & mat );
-
-    // Set the upper-left 3x3 submatrix
-    // NOTE: 
-    // This function does not change the bottom row elements.
-    // 
-    inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
-
-    // Get the upper-left 3x3 submatrix of a 4x4 matrix
-    // 
-    inline const Matrix3 getUpper3x3( ) const;
-
-    // Set translation component
-    // NOTE: 
-    // This function does not change the bottom row elements.
-    // 
-    inline Matrix4 & setTranslation( Vector3 translateVec );
-
-    // Get the translation component of a 4x4 matrix
-    // 
-    inline const Vector3 getTranslation( ) const;
-
-    // Set column 0 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol0( Vector4 col0 );
-
-    // Set column 1 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol1( Vector4 col1 );
-
-    // Set column 2 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol2( Vector4 col2 );
-
-    // Set column 3 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol3( Vector4 col3 );
-
-    // Get column 0 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol0( ) const;
-
-    // Get column 1 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol1( ) const;
-
-    // Get column 2 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol2( ) const;
-
-    // Get column 3 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol3( ) const;
-
-    // Set the column of a 4x4 matrix referred to by the specified index
-    // 
-    inline Matrix4 & setCol( int col, Vector4 vec );
-
-    // Set the row of a 4x4 matrix referred to by the specified index
-    // 
-    inline Matrix4 & setRow( int row, Vector4 vec );
-
-    // Get the column of a 4x4 matrix referred to by the specified index
-    // 
-    inline const Vector4 getCol( int col ) const;
-
-    // Get the row of a 4x4 matrix referred to by the specified index
-    // 
-    inline const Vector4 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector4 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector4 operator []( int col ) const;
-
-    // Set the element of a 4x4 matrix referred to by column and row indices
-    // 
-    inline Matrix4 & setElem( int col, int row, float val );
-
-    // Set the element of a 4x4 matrix referred to by column and row indices (scalar data contained in vector data type)
-    // 
-    inline Matrix4 & setElem( int col, int row, floatInVec val );
-
-    // Get the element of a 4x4 matrix referred to by column and row indices
-    // 
-    inline const floatInVec getElem( int col, int row ) const;
-
-    // Add two 4x4 matrices
-    // 
-    inline const Matrix4 operator +( const Matrix4 & mat ) const;
-
-    // Subtract a 4x4 matrix from another 4x4 matrix
-    // 
-    inline const Matrix4 operator -( const Matrix4 & mat ) const;
-
-    // Negate all elements of a 4x4 matrix
-    // 
-    inline const Matrix4 operator -( ) const;
-
-    // Multiply a 4x4 matrix by a scalar
-    // 
-    inline const Matrix4 operator *( float scalar ) const;
-
-    // Multiply a 4x4 matrix by a scalar (scalar data contained in vector data type)
-    // 
-    inline const Matrix4 operator *( floatInVec scalar ) const;
-
-    // Multiply a 4x4 matrix by a 4-D vector
-    // 
-    inline const Vector4 operator *( Vector4 vec ) const;
-
-    // Multiply a 4x4 matrix by a 3-D vector
-    // 
-    inline const Vector4 operator *( Vector3 vec ) const;
-
-    // Multiply a 4x4 matrix by a 3-D point
-    // 
-    inline const Vector4 operator *( Point3 pnt ) const;
-
-    // Multiply two 4x4 matrices
-    // 
-    inline const Matrix4 operator *( const Matrix4 & mat ) const;
-
-    // Multiply a 4x4 matrix by a 3x4 transformation matrix
-    // 
-    inline const Matrix4 operator *( const Transform3 & tfrm ) const;
-
-    // Perform compound assignment and addition with a 4x4 matrix
-    // 
-    inline Matrix4 & operator +=( const Matrix4 & mat );
-
-    // Perform compound assignment and subtraction by a 4x4 matrix
-    // 
-    inline Matrix4 & operator -=( const Matrix4 & mat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Matrix4 & operator *=( float scalar );
-
-    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
-    // 
-    inline Matrix4 & operator *=( floatInVec scalar );
-
-    // Perform compound assignment and multiplication by a 4x4 matrix
-    // 
-    inline Matrix4 & operator *=( const Matrix4 & mat );
-
-    // Perform compound assignment and multiplication by a 3x4 transformation matrix
-    // 
-    inline Matrix4 & operator *=( const Transform3 & tfrm );
-
-    // Construct an identity 4x4 matrix
-    // 
-    static inline const Matrix4 identity( );
-
-    // Construct a 4x4 matrix to rotate around the x axis
-    // 
-    static inline const Matrix4 rotationX( float radians );
-
-    // Construct a 4x4 matrix to rotate around the y axis
-    // 
-    static inline const Matrix4 rotationY( float radians );
-
-    // Construct a 4x4 matrix to rotate around the z axis
-    // 
-    static inline const Matrix4 rotationZ( float radians );
-
-    // Construct a 4x4 matrix to rotate around the x axis (scalar data contained in vector data type)
-    // 
-    static inline const Matrix4 rotationX( floatInVec radians );
-
-    // Construct a 4x4 matrix to rotate around the y axis (scalar data contained in vector data type)
-    // 
-    static inline const Matrix4 rotationY( floatInVec radians );
-
-    // Construct a 4x4 matrix to rotate around the z axis (scalar data contained in vector data type)
-    // 
-    static inline const Matrix4 rotationZ( floatInVec radians );
-
-    // Construct a 4x4 matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Matrix4 rotationZYX( Vector3 radiansXYZ );
-
-    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Matrix4 rotation( float radians, Vector3 unitVec );
-
-    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
-    // 
-    static inline const Matrix4 rotation( floatInVec radians, Vector3 unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Matrix4 rotation( Quat unitQuat );
-
-    // Construct a 4x4 matrix to perform scaling
-    // 
-    static inline const Matrix4 scale( Vector3 scaleVec );
-
-    // Construct a 4x4 matrix to perform translation
-    // 
-    static inline const Matrix4 translation( Vector3 translateVec );
-
-    // Construct viewing matrix based on eye position, position looked at, and up direction
-    // 
-    static inline const Matrix4 lookAt( Point3 eyePos, Point3 lookAtPos, Vector3 upVec );
-
-    // Construct a perspective projection matrix
-    // 
-    static inline const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar );
-
-    // Construct a perspective projection matrix based on frustum
-    // 
-    static inline const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar );
-
-    // Construct an orthographic projection matrix
-    // 
-    static inline const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar );
-
-};
-// Multiply a 4x4 matrix by a scalar
-// 
-inline const Matrix4 operator *( float scalar, const Matrix4 & mat );
-
-// Multiply a 4x4 matrix by a scalar (scalar data contained in vector data type)
-// 
-inline const Matrix4 operator *( floatInVec scalar, const Matrix4 & mat );
-
-// Append (post-multiply) a scale transformation to a 4x4 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix4 appendScale( const Matrix4 & mat, Vector3 scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix4 prependScale( Vector3 scaleVec, const Matrix4 & mat );
-
-// Multiply two 4x4 matrices per element
-// 
-inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
-
-// Compute the absolute value of a 4x4 matrix per element
-// 
-inline const Matrix4 absPerElem( const Matrix4 & mat );
-
-// Transpose of a 4x4 matrix
-// 
-inline const Matrix4 transpose( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix
-// NOTE: 
-// Result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix4 inverse( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix4 affineInverse( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
-// 
-inline const Matrix4 orthoInverse( const Matrix4 & mat );
-
-// Determinant of a 4x4 matrix
-// 
-inline const floatInVec determinant( const Matrix4 & mat );
-
-// Conditionally select between two 4x4 matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// However, the transfer of select1 to a VMX register may use more processing time than a branch.
-// Use the boolInVec version for better performance.
-// 
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 );
-
-// Conditionally select between two 4x4 matrices (scalar data contained in vector data type)
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, boolInVec select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 4x4 matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix4 & mat );
-
-// Print a 4x4 matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix4 & mat, const char * name );
-
-#endif
-
-// A 3x4 transformation matrix in array-of-structures format
-//
-class Transform3
-{
-    Vector3 mCol0;
-    Vector3 mCol1;
-    Vector3 mCol2;
-    Vector3 mCol3;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Transform3( ) { };
-
-    // Copy a 3x4 transformation matrix
-    // 
-    inline Transform3( const Transform3 & tfrm );
-
-    // Construct a 3x4 transformation matrix containing the specified columns
-    // 
-    inline Transform3( Vector3 col0, Vector3 col1, Vector3 col2, Vector3 col3 );
-
-    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
-    // 
-    inline Transform3( const Matrix3 & tfrm, Vector3 translateVec );
-
-    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
-    // 
-    inline Transform3( Quat unitQuat, Vector3 translateVec );
-
-    // Set all elements of a 3x4 transformation matrix to the same scalar value
-    // 
-    explicit inline Transform3( float scalar );
-
-    // Set all elements of a 3x4 transformation matrix to the same scalar value (scalar data contained in vector data type)
-    // 
-    explicit inline Transform3( floatInVec scalar );
-
-    // Assign one 3x4 transformation matrix to another
-    // 
-    inline Transform3 & operator =( const Transform3 & tfrm );
-
-    // Set the upper-left 3x3 submatrix
-    // 
-    inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
-
-    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
-    // 
-    inline const Matrix3 getUpper3x3( ) const;
-
-    // Set translation component
-    // 
-    inline Transform3 & setTranslation( Vector3 translateVec );
-
-    // Get the translation component of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getTranslation( ) const;
-
-    // Set column 0 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol0( Vector3 col0 );
-
-    // Set column 1 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol1( Vector3 col1 );
-
-    // Set column 2 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol2( Vector3 col2 );
-
-    // Set column 3 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol3( Vector3 col3 );
-
-    // Get column 0 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol0( ) const;
-
-    // Get column 1 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol1( ) const;
-
-    // Get column 2 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol2( ) const;
-
-    // Get column 3 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol3( ) const;
-
-    // Set the column of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline Transform3 & setCol( int col, Vector3 vec );
-
-    // Set the row of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline Transform3 & setRow( int row, Vector4 vec );
-
-    // Get the column of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline const Vector3 getCol( int col ) const;
-
-    // Get the row of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline const Vector4 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector3 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector3 operator []( int col ) const;
-
-    // Set the element of a 3x4 transformation matrix referred to by column and row indices
-    // 
-    inline Transform3 & setElem( int col, int row, float val );
-
-    // Set the element of a 3x4 transformation matrix referred to by column and row indices (scalar data contained in vector data type)
-    // 
-    inline Transform3 & setElem( int col, int row, floatInVec val );
-
-    // Get the element of a 3x4 transformation matrix referred to by column and row indices
-    // 
-    inline const floatInVec getElem( int col, int row ) const;
-
-    // Multiply a 3x4 transformation matrix by a 3-D vector
-    // 
-    inline const Vector3 operator *( Vector3 vec ) const;
-
-    // Multiply a 3x4 transformation matrix by a 3-D point
-    // 
-    inline const Point3 operator *( Point3 pnt ) const;
-
-    // Multiply two 3x4 transformation matrices
-    // 
-    inline const Transform3 operator *( const Transform3 & tfrm ) const;
-
-    // Perform compound assignment and multiplication by a 3x4 transformation matrix
-    // 
-    inline Transform3 & operator *=( const Transform3 & tfrm );
-
-    // Construct an identity 3x4 transformation matrix
-    // 
-    static inline const Transform3 identity( );
-
-    // Construct a 3x4 transformation matrix to rotate around the x axis
-    // 
-    static inline const Transform3 rotationX( float radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the y axis
-    // 
-    static inline const Transform3 rotationY( float radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the z axis
-    // 
-    static inline const Transform3 rotationZ( float radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the x axis (scalar data contained in vector data type)
-    // 
-    static inline const Transform3 rotationX( floatInVec radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the y axis (scalar data contained in vector data type)
-    // 
-    static inline const Transform3 rotationY( floatInVec radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the z axis (scalar data contained in vector data type)
-    // 
-    static inline const Transform3 rotationZ( floatInVec radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Transform3 rotationZYX( Vector3 radiansXYZ );
-
-    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Transform3 rotation( float radians, Vector3 unitVec );
-
-    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
-    // 
-    static inline const Transform3 rotation( floatInVec radians, Vector3 unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Transform3 rotation( Quat unitQuat );
-
-    // Construct a 3x4 transformation matrix to perform scaling
-    // 
-    static inline const Transform3 scale( Vector3 scaleVec );
-
-    // Construct a 3x4 transformation matrix to perform translation
-    // 
-    static inline const Transform3 translation( Vector3 translateVec );
-
-};
-// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Transform3 appendScale( const Transform3 & tfrm, Vector3 scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Transform3 prependScale( Vector3 scaleVec, const Transform3 & tfrm );
-
-// Multiply two 3x4 transformation matrices per element
-// 
-inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
-
-// Compute the absolute value of a 3x4 transformation matrix per element
-// 
-inline const Transform3 absPerElem( const Transform3 & tfrm );
-
-// Inverse of a 3x4 transformation matrix
-// NOTE: 
-// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
-// 
-inline const Transform3 inverse( const Transform3 & tfrm );
-
-// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
-// 
-inline const Transform3 orthoInverse( const Transform3 & tfrm );
-
-// Conditionally select between two 3x4 transformation matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// However, the transfer of select1 to a VMX register may use more processing time than a branch.
-// Use the boolInVec version for better performance.
-// 
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 );
-
-// Conditionally select between two 3x4 transformation matrices (scalar data contained in vector data type)
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, boolInVec select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3x4 transformation matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Transform3 & tfrm );
-
-// Print a 3x4 transformation matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Transform3 & tfrm, const char * name );
-
-#endif
-
-} // namespace Aos
-} // namespace Vectormath
-
-#include "vec_aos.h"
-#include "quat_aos.h"
-#include "mat_aos.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_AOS_CPP_PPU_H
+#define _VECTORMATH_AOS_CPP_PPU_H
+
+#include <math.h>
+#include <altivec.h>
+#include "vecidx_aos.h"
+#include "floatInVec.h"
+#include "boolInVec.h"
+
+#ifdef _VECTORMATH_DEBUG
+#include <stdio.h>
+#endif
+
+namespace Vectormath {
+
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// Forward Declarations
+//
+
+class Vector3;
+class Vector4;
+class Point3;
+class Quat;
+class Matrix3;
+class Matrix4;
+class Transform3;
+
+// A 3-D vector in array-of-structures format
+//
+class Vector3
+{
+    vec_float4 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector3( ) { };
+
+    // Construct a 3-D vector from x, y, and z elements
+    // 
+    inline Vector3( float x, float y, float z );
+
+    // Construct a 3-D vector from x, y, and z elements (scalar data contained in vector data type)
+    // 
+    inline Vector3( floatInVec x, floatInVec y, floatInVec z );
+
+    // Copy elements from a 3-D point into a 3-D vector
+    // 
+    explicit inline Vector3( Point3 pnt );
+
+    // Set all elements of a 3-D vector to the same scalar value
+    // 
+    explicit inline Vector3( float scalar );
+
+    // Set all elements of a 3-D vector to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit inline Vector3( floatInVec scalar );
+
+    // Set vector float data in a 3-D vector
+    // 
+    explicit inline Vector3( vec_float4 vf4 );
+
+    // Get vector float data from a 3-D vector
+    // 
+    inline vec_float4 get128( ) const;
+
+    // Assign one 3-D vector to another
+    // 
+    inline Vector3 & operator =( Vector3 vec );
+
+    // Set the x element of a 3-D vector
+    // 
+    inline Vector3 & setX( float x );
+
+    // Set the y element of a 3-D vector
+    // 
+    inline Vector3 & setY( float y );
+
+    // Set the z element of a 3-D vector
+    // 
+    inline Vector3 & setZ( float z );
+
+    // Set the x element of a 3-D vector (scalar data contained in vector data type)
+    // 
+    inline Vector3 & setX( floatInVec x );
+
+    // Set the y element of a 3-D vector (scalar data contained in vector data type)
+    // 
+    inline Vector3 & setY( floatInVec y );
+
+    // Set the z element of a 3-D vector (scalar data contained in vector data type)
+    // 
+    inline Vector3 & setZ( floatInVec z );
+
+    // Get the x element of a 3-D vector
+    // 
+    inline const floatInVec getX( ) const;
+
+    // Get the y element of a 3-D vector
+    // 
+    inline const floatInVec getY( ) const;
+
+    // Get the z element of a 3-D vector
+    // 
+    inline const floatInVec getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D vector by index
+    // 
+    inline Vector3 & setElem( int idx, float value );
+
+    // Set an x, y, or z element of a 3-D vector by index (scalar data contained in vector data type)
+    // 
+    inline Vector3 & setElem( int idx, floatInVec value );
+
+    // Get an x, y, or z element of a 3-D vector by index
+    // 
+    inline const floatInVec getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline const floatInVec operator []( int idx ) const;
+
+    // Add two 3-D vectors
+    // 
+    inline const Vector3 operator +( Vector3 vec ) const;
+
+    // Subtract a 3-D vector from another 3-D vector
+    // 
+    inline const Vector3 operator -( Vector3 vec ) const;
+
+    // Add a 3-D vector to a 3-D point
+    // 
+    inline const Point3 operator +( Point3 pnt ) const;
+
+    // Multiply a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator *( float scalar ) const;
+
+    // Divide a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator /( float scalar ) const;
+
+    // Multiply a 3-D vector by a scalar (scalar data contained in vector data type)
+    // 
+    inline const Vector3 operator *( floatInVec scalar ) const;
+
+    // Divide a 3-D vector by a scalar (scalar data contained in vector data type)
+    // 
+    inline const Vector3 operator /( floatInVec scalar ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Vector3 & operator +=( Vector3 vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Vector3 & operator -=( Vector3 vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector3 & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector3 & operator /=( float scalar );
+
+    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
+    // 
+    inline Vector3 & operator *=( floatInVec scalar );
+
+    // Perform compound assignment and division by a scalar (scalar data contained in vector data type)
+    // 
+    inline Vector3 & operator /=( floatInVec scalar );
+
+    // Negate all elements of a 3-D vector
+    // 
+    inline const Vector3 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector3 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector3 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector3 zAxis( );
+
+};
+
+// Multiply a 3-D vector by a scalar
+// 
+inline const Vector3 operator *( float scalar, Vector3 vec );
+
+// Multiply a 3-D vector by a scalar (scalar data contained in vector data type)
+// 
+inline const Vector3 operator *( floatInVec scalar, Vector3 vec );
+
+// Multiply two 3-D vectors per element
+// 
+inline const Vector3 mulPerElem( Vector3 vec0, Vector3 vec1 );
+
+// Divide two 3-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector3 divPerElem( Vector3 vec0, Vector3 vec1 );
+
+// Compute the reciprocal of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector3 recipPerElem( Vector3 vec );
+
+// Compute the square root of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Vector3 sqrtPerElem( Vector3 vec );
+
+// Compute the reciprocal square root of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Vector3 rsqrtPerElem( Vector3 vec );
+
+// Compute the absolute value of a 3-D vector per element
+// 
+inline const Vector3 absPerElem( Vector3 vec );
+
+// Copy sign from one 3-D vector to another, per element
+// 
+inline const Vector3 copySignPerElem( Vector3 vec0, Vector3 vec1 );
+
+// Maximum of two 3-D vectors per element
+// 
+inline const Vector3 maxPerElem( Vector3 vec0, Vector3 vec1 );
+
+// Minimum of two 3-D vectors per element
+// 
+inline const Vector3 minPerElem( Vector3 vec0, Vector3 vec1 );
+
+// Maximum element of a 3-D vector
+// 
+inline const floatInVec maxElem( Vector3 vec );
+
+// Minimum element of a 3-D vector
+// 
+inline const floatInVec minElem( Vector3 vec );
+
+// Compute the sum of all elements of a 3-D vector
+// 
+inline const floatInVec sum( Vector3 vec );
+
+// Compute the dot product of two 3-D vectors
+// 
+inline const floatInVec dot( Vector3 vec0, Vector3 vec1 );
+
+// Compute the square of the length of a 3-D vector
+// 
+inline const floatInVec lengthSqr( Vector3 vec );
+
+// Compute the length of a 3-D vector
+// 
+inline const floatInVec length( Vector3 vec );
+
+// Normalize a 3-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector3 normalize( Vector3 vec );
+
+// Compute cross product of two 3-D vectors
+// 
+inline const Vector3 cross( Vector3 vec0, Vector3 vec1 );
+
+// Outer product of two 3-D vectors
+// 
+inline const Matrix3 outer( Vector3 vec0, Vector3 vec1 );
+
+// Pre-multiply a row vector by a 3x3 matrix
+// NOTE: 
+// Slower than column post-multiply.
+// 
+inline const Vector3 rowMul( Vector3 vec, const Matrix3 & mat );
+
+// Cross-product matrix of a 3-D vector
+// 
+inline const Matrix3 crossMatrix( Vector3 vec );
+
+// Create cross-product matrix and multiply
+// NOTE: 
+// Faster than separately creating a cross-product matrix and multiplying.
+// 
+inline const Matrix3 crossMatrixMul( Vector3 vec, const Matrix3 & mat );
+
+// Linear interpolation between two 3-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 lerp( float t, Vector3 vec0, Vector3 vec1 );
+
+// Linear interpolation between two 3-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 lerp( floatInVec t, Vector3 vec0, Vector3 vec1 );
+
+// Spherical linear interpolation between two 3-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 slerp( float t, Vector3 unitVec0, Vector3 unitVec1 );
+
+// Spherical linear interpolation between two 3-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 slerp( floatInVec t, Vector3 unitVec0, Vector3 unitVec1 );
+
+// Conditionally select between two 3-D vectors
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+inline const Vector3 select( Vector3 vec0, Vector3 vec1, bool select1 );
+
+// Conditionally select between two 3-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Vector3 select( Vector3 vec0, Vector3 vec1, boolInVec select1 );
+
+// Store x, y, and z elements of a 3-D vector in the first three words of a quadword.
+// The value of the fourth word (the word with the highest address) remains unchanged
+// 
+inline void storeXYZ( Vector3 vec, vec_float4 * quad );
+
+// Load four three-float 3-D vectors, stored in three quadwords
+// 
+inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const vec_float4 * threeQuads );
+
+// Store four 3-D vectors in three quadwords
+// 
+inline void storeXYZArray( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, vec_float4 * threeQuads );
+
+// Store eight 3-D vectors as half-floats
+// 
+inline void storeHalfFloats( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, Vector3 vec4, Vector3 vec5, Vector3 vec6, Vector3 vec7, vec_ushort8 * threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Vector3 vec );
+
+// Print a 3-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Vector3 vec, const char * name );
+
+#endif
+
+// A 4-D vector in array-of-structures format
+//
+class Vector4
+{
+    vec_float4 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector4( ) { };
+
+    // Construct a 4-D vector from x, y, z, and w elements
+    // 
+    inline Vector4( float x, float y, float z, float w );
+
+    // Construct a 4-D vector from x, y, z, and w elements (scalar data contained in vector data type)
+    // 
+    inline Vector4( floatInVec x, floatInVec y, floatInVec z, floatInVec w );
+
+    // Construct a 4-D vector from a 3-D vector and a scalar
+    // 
+    inline Vector4( Vector3 xyz, float w );
+
+    // Construct a 4-D vector from a 3-D vector and a scalar (scalar data contained in vector data type)
+    // 
+    inline Vector4( Vector3 xyz, floatInVec w );
+
+    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+    // 
+    explicit inline Vector4( Vector3 vec );
+
+    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+    // 
+    explicit inline Vector4( Point3 pnt );
+
+    // Copy elements from a quaternion into a 4-D vector
+    // 
+    explicit inline Vector4( Quat quat );
+
+    // Set all elements of a 4-D vector to the same scalar value
+    // 
+    explicit inline Vector4( float scalar );
+
+    // Set all elements of a 4-D vector to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit inline Vector4( floatInVec scalar );
+
+    // Set vector float data in a 4-D vector
+    // 
+    explicit inline Vector4( vec_float4 vf4 );
+
+    // Get vector float data from a 4-D vector
+    // 
+    inline vec_float4 get128( ) const;
+
+    // Assign one 4-D vector to another
+    // 
+    inline Vector4 & operator =( Vector4 vec );
+
+    // Set the x, y, and z elements of a 4-D vector
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Vector4 & setXYZ( Vector3 vec );
+
+    // Get the x, y, and z elements of a 4-D vector
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a 4-D vector
+    // 
+    inline Vector4 & setX( float x );
+
+    // Set the y element of a 4-D vector
+    // 
+    inline Vector4 & setY( float y );
+
+    // Set the z element of a 4-D vector
+    // 
+    inline Vector4 & setZ( float z );
+
+    // Set the w element of a 4-D vector
+    // 
+    inline Vector4 & setW( float w );
+
+    // Set the x element of a 4-D vector (scalar data contained in vector data type)
+    // 
+    inline Vector4 & setX( floatInVec x );
+
+    // Set the y element of a 4-D vector (scalar data contained in vector data type)
+    // 
+    inline Vector4 & setY( floatInVec y );
+
+    // Set the z element of a 4-D vector (scalar data contained in vector data type)
+    // 
+    inline Vector4 & setZ( floatInVec z );
+
+    // Set the w element of a 4-D vector (scalar data contained in vector data type)
+    // 
+    inline Vector4 & setW( floatInVec w );
+
+    // Get the x element of a 4-D vector
+    // 
+    inline const floatInVec getX( ) const;
+
+    // Get the y element of a 4-D vector
+    // 
+    inline const floatInVec getY( ) const;
+
+    // Get the z element of a 4-D vector
+    // 
+    inline const floatInVec getZ( ) const;
+
+    // Get the w element of a 4-D vector
+    // 
+    inline const floatInVec getW( ) const;
+
+    // Set an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline Vector4 & setElem( int idx, float value );
+
+    // Set an x, y, z, or w element of a 4-D vector by index (scalar data contained in vector data type)
+    // 
+    inline Vector4 & setElem( int idx, floatInVec value );
+
+    // Get an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline const floatInVec getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline const floatInVec operator []( int idx ) const;
+
+    // Add two 4-D vectors
+    // 
+    inline const Vector4 operator +( Vector4 vec ) const;
+
+    // Subtract a 4-D vector from another 4-D vector
+    // 
+    inline const Vector4 operator -( Vector4 vec ) const;
+
+    // Multiply a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator *( float scalar ) const;
+
+    // Divide a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator /( float scalar ) const;
+
+    // Multiply a 4-D vector by a scalar (scalar data contained in vector data type)
+    // 
+    inline const Vector4 operator *( floatInVec scalar ) const;
+
+    // Divide a 4-D vector by a scalar (scalar data contained in vector data type)
+    // 
+    inline const Vector4 operator /( floatInVec scalar ) const;
+
+    // Perform compound assignment and addition with a 4-D vector
+    // 
+    inline Vector4 & operator +=( Vector4 vec );
+
+    // Perform compound assignment and subtraction by a 4-D vector
+    // 
+    inline Vector4 & operator -=( Vector4 vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector4 & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector4 & operator /=( float scalar );
+
+    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
+    // 
+    inline Vector4 & operator *=( floatInVec scalar );
+
+    // Perform compound assignment and division by a scalar (scalar data contained in vector data type)
+    // 
+    inline Vector4 & operator /=( floatInVec scalar );
+
+    // Negate all elements of a 4-D vector
+    // 
+    inline const Vector4 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector4 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector4 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector4 zAxis( );
+
+    // Construct w axis
+    // 
+    static inline const Vector4 wAxis( );
+
+};
+
+// Multiply a 4-D vector by a scalar
+// 
+inline const Vector4 operator *( float scalar, Vector4 vec );
+
+// Multiply a 4-D vector by a scalar (scalar data contained in vector data type)
+// 
+inline const Vector4 operator *( floatInVec scalar, Vector4 vec );
+
+// Multiply two 4-D vectors per element
+// 
+inline const Vector4 mulPerElem( Vector4 vec0, Vector4 vec1 );
+
+// Divide two 4-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector4 divPerElem( Vector4 vec0, Vector4 vec1 );
+
+// Compute the reciprocal of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector4 recipPerElem( Vector4 vec );
+
+// Compute the square root of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Vector4 sqrtPerElem( Vector4 vec );
+
+// Compute the reciprocal square root of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Vector4 rsqrtPerElem( Vector4 vec );
+
+// Compute the absolute value of a 4-D vector per element
+// 
+inline const Vector4 absPerElem( Vector4 vec );
+
+// Copy sign from one 4-D vector to another, per element
+// 
+inline const Vector4 copySignPerElem( Vector4 vec0, Vector4 vec1 );
+
+// Maximum of two 4-D vectors per element
+// 
+inline const Vector4 maxPerElem( Vector4 vec0, Vector4 vec1 );
+
+// Minimum of two 4-D vectors per element
+// 
+inline const Vector4 minPerElem( Vector4 vec0, Vector4 vec1 );
+
+// Maximum element of a 4-D vector
+// 
+inline const floatInVec maxElem( Vector4 vec );
+
+// Minimum element of a 4-D vector
+// 
+inline const floatInVec minElem( Vector4 vec );
+
+// Compute the sum of all elements of a 4-D vector
+// 
+inline const floatInVec sum( Vector4 vec );
+
+// Compute the dot product of two 4-D vectors
+// 
+inline const floatInVec dot( Vector4 vec0, Vector4 vec1 );
+
+// Compute the square of the length of a 4-D vector
+// 
+inline const floatInVec lengthSqr( Vector4 vec );
+
+// Compute the length of a 4-D vector
+// 
+inline const floatInVec length( Vector4 vec );
+
+// Normalize a 4-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector4 normalize( Vector4 vec );
+
+// Outer product of two 4-D vectors
+// 
+inline const Matrix4 outer( Vector4 vec0, Vector4 vec1 );
+
+// Linear interpolation between two 4-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 lerp( float t, Vector4 vec0, Vector4 vec1 );
+
+// Linear interpolation between two 4-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 lerp( floatInVec t, Vector4 vec0, Vector4 vec1 );
+
+// Spherical linear interpolation between two 4-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 slerp( float t, Vector4 unitVec0, Vector4 unitVec1 );
+
+// Spherical linear interpolation between two 4-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 slerp( floatInVec t, Vector4 unitVec0, Vector4 unitVec1 );
+
+// Conditionally select between two 4-D vectors
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+inline const Vector4 select( Vector4 vec0, Vector4 vec1, bool select1 );
+
+// Conditionally select between two 4-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Vector4 select( Vector4 vec0, Vector4 vec1, boolInVec select1 );
+
+// Store four 4-D vectors as half-floats
+// 
+inline void storeHalfFloats( Vector4 vec0, Vector4 vec1, Vector4 vec2, Vector4 vec3, vec_ushort8 * twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Vector4 vec );
+
+// Print a 4-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Vector4 vec, const char * name );
+
+#endif
+
+// A 3-D point in array-of-structures format
+//
+class Point3
+{
+    vec_float4 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Point3( ) { };
+
+    // Construct a 3-D point from x, y, and z elements
+    // 
+    inline Point3( float x, float y, float z );
+
+    // Construct a 3-D point from x, y, and z elements (scalar data contained in vector data type)
+    // 
+    inline Point3( floatInVec x, floatInVec y, floatInVec z );
+
+    // Copy elements from a 3-D vector into a 3-D point
+    // 
+    explicit inline Point3( Vector3 vec );
+
+    // Set all elements of a 3-D point to the same scalar value
+    // 
+    explicit inline Point3( float scalar );
+
+    // Set all elements of a 3-D point to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit inline Point3( floatInVec scalar );
+
+    // Set vector float data in a 3-D point
+    // 
+    explicit inline Point3( vec_float4 vf4 );
+
+    // Get vector float data from a 3-D point
+    // 
+    inline vec_float4 get128( ) const;
+
+    // Assign one 3-D point to another
+    // 
+    inline Point3 & operator =( Point3 pnt );
+
+    // Set the x element of a 3-D point
+    // 
+    inline Point3 & setX( float x );
+
+    // Set the y element of a 3-D point
+    // 
+    inline Point3 & setY( float y );
+
+    // Set the z element of a 3-D point
+    // 
+    inline Point3 & setZ( float z );
+
+    // Set the x element of a 3-D point (scalar data contained in vector data type)
+    // 
+    inline Point3 & setX( floatInVec x );
+
+    // Set the y element of a 3-D point (scalar data contained in vector data type)
+    // 
+    inline Point3 & setY( floatInVec y );
+
+    // Set the z element of a 3-D point (scalar data contained in vector data type)
+    // 
+    inline Point3 & setZ( floatInVec z );
+
+    // Get the x element of a 3-D point
+    // 
+    inline const floatInVec getX( ) const;
+
+    // Get the y element of a 3-D point
+    // 
+    inline const floatInVec getY( ) const;
+
+    // Get the z element of a 3-D point
+    // 
+    inline const floatInVec getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D point by index
+    // 
+    inline Point3 & setElem( int idx, float value );
+
+    // Set an x, y, or z element of a 3-D point by index (scalar data contained in vector data type)
+    // 
+    inline Point3 & setElem( int idx, floatInVec value );
+
+    // Get an x, y, or z element of a 3-D point by index
+    // 
+    inline const floatInVec getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline const floatInVec operator []( int idx ) const;
+
+    // Subtract a 3-D point from another 3-D point
+    // 
+    inline const Vector3 operator -( Point3 pnt ) const;
+
+    // Add a 3-D point to a 3-D vector
+    // 
+    inline const Point3 operator +( Vector3 vec ) const;
+
+    // Subtract a 3-D vector from a 3-D point
+    // 
+    inline const Point3 operator -( Vector3 vec ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Point3 & operator +=( Vector3 vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Point3 & operator -=( Vector3 vec );
+
+};
+
+// Multiply two 3-D points per element
+// 
+inline const Point3 mulPerElem( Point3 pnt0, Point3 pnt1 );
+
+// Divide two 3-D points per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Point3 divPerElem( Point3 pnt0, Point3 pnt1 );
+
+// Compute the reciprocal of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Point3 recipPerElem( Point3 pnt );
+
+// Compute the square root of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Point3 sqrtPerElem( Point3 pnt );
+
+// Compute the reciprocal square root of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Point3 rsqrtPerElem( Point3 pnt );
+
+// Compute the absolute value of a 3-D point per element
+// 
+inline const Point3 absPerElem( Point3 pnt );
+
+// Copy sign from one 3-D point to another, per element
+// 
+inline const Point3 copySignPerElem( Point3 pnt0, Point3 pnt1 );
+
+// Maximum of two 3-D points per element
+// 
+inline const Point3 maxPerElem( Point3 pnt0, Point3 pnt1 );
+
+// Minimum of two 3-D points per element
+// 
+inline const Point3 minPerElem( Point3 pnt0, Point3 pnt1 );
+
+// Maximum element of a 3-D point
+// 
+inline const floatInVec maxElem( Point3 pnt );
+
+// Minimum element of a 3-D point
+// 
+inline const floatInVec minElem( Point3 pnt );
+
+// Compute the sum of all elements of a 3-D point
+// 
+inline const floatInVec sum( Point3 pnt );
+
+// Apply uniform scale to a 3-D point
+// 
+inline const Point3 scale( Point3 pnt, float scaleVal );
+
+// Apply uniform scale to a 3-D point (scalar data contained in vector data type)
+// 
+inline const Point3 scale( Point3 pnt, floatInVec scaleVal );
+
+// Apply non-uniform scale to a 3-D point
+// 
+inline const Point3 scale( Point3 pnt, Vector3 scaleVec );
+
+// Scalar projection of a 3-D point on a unit-length 3-D vector
+// 
+inline const floatInVec projection( Point3 pnt, Vector3 unitVec );
+
+// Compute the square of the distance of a 3-D point from the coordinate-system origin
+// 
+inline const floatInVec distSqrFromOrigin( Point3 pnt );
+
+// Compute the distance of a 3-D point from the coordinate-system origin
+// 
+inline const floatInVec distFromOrigin( Point3 pnt );
+
+// Compute the square of the distance between two 3-D points
+// 
+inline const floatInVec distSqr( Point3 pnt0, Point3 pnt1 );
+
+// Compute the distance between two 3-D points
+// 
+inline const floatInVec dist( Point3 pnt0, Point3 pnt1 );
+
+// Linear interpolation between two 3-D points
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Point3 lerp( float t, Point3 pnt0, Point3 pnt1 );
+
+// Linear interpolation between two 3-D points (scalar data contained in vector data type)
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Point3 lerp( floatInVec t, Point3 pnt0, Point3 pnt1 );
+
+// Conditionally select between two 3-D points
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+inline const Point3 select( Point3 pnt0, Point3 pnt1, bool select1 );
+
+// Conditionally select between two 3-D points (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Point3 select( Point3 pnt0, Point3 pnt1, boolInVec select1 );
+
+// Store x, y, and z elements of a 3-D point in the first three words of a quadword.
+// The value of the fourth word (the word with the highest address) remains unchanged
+// 
+inline void storeXYZ( Point3 pnt, vec_float4 * quad );
+
+// Load four three-float 3-D points, stored in three quadwords
+// 
+inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const vec_float4 * threeQuads );
+
+// Store four 3-D points in three quadwords
+// 
+inline void storeXYZArray( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, vec_float4 * threeQuads );
+
+// Store eight 3-D points as half-floats
+// 
+inline void storeHalfFloats( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, Point3 pnt4, Point3 pnt5, Point3 pnt6, Point3 pnt7, vec_ushort8 * threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D point
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Point3 pnt );
+
+// Print a 3-D point and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Point3 pnt, const char * name );
+
+#endif
+
+// A quaternion in array-of-structures format
+//
+class Quat
+{
+    vec_float4 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Quat( ) { };
+
+    // Construct a quaternion from x, y, z, and w elements
+    // 
+    inline Quat( float x, float y, float z, float w );
+
+    // Construct a quaternion from x, y, z, and w elements (scalar data contained in vector data type)
+    // 
+    inline Quat( floatInVec x, floatInVec y, floatInVec z, floatInVec w );
+
+    // Construct a quaternion from a 3-D vector and a scalar
+    // 
+    inline Quat( Vector3 xyz, float w );
+
+    // Construct a quaternion from a 3-D vector and a scalar (scalar data contained in vector data type)
+    // 
+    inline Quat( Vector3 xyz, floatInVec w );
+
+    // Copy elements from a 4-D vector into a quaternion
+    // 
+    explicit inline Quat( Vector4 vec );
+
+    // Convert a rotation matrix to a unit-length quaternion
+    // 
+    explicit inline Quat( const Matrix3 & rotMat );
+
+    // Set all elements of a quaternion to the same scalar value
+    // 
+    explicit inline Quat( float scalar );
+
+    // Set all elements of a quaternion to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit inline Quat( floatInVec scalar );
+
+    // Set vector float data in a quaternion
+    // 
+    explicit inline Quat( vec_float4 vf4 );
+
+    // Get vector float data from a quaternion
+    // 
+    inline vec_float4 get128( ) const;
+
+    // Assign one quaternion to another
+    // 
+    inline Quat & operator =( Quat quat );
+
+    // Set the x, y, and z elements of a quaternion
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Quat & setXYZ( Vector3 vec );
+
+    // Get the x, y, and z elements of a quaternion
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a quaternion
+    // 
+    inline Quat & setX( float x );
+
+    // Set the y element of a quaternion
+    // 
+    inline Quat & setY( float y );
+
+    // Set the z element of a quaternion
+    // 
+    inline Quat & setZ( float z );
+
+    // Set the w element of a quaternion
+    // 
+    inline Quat & setW( float w );
+
+    // Set the x element of a quaternion (scalar data contained in vector data type)
+    // 
+    inline Quat & setX( floatInVec x );
+
+    // Set the y element of a quaternion (scalar data contained in vector data type)
+    // 
+    inline Quat & setY( floatInVec y );
+
+    // Set the z element of a quaternion (scalar data contained in vector data type)
+    // 
+    inline Quat & setZ( floatInVec z );
+
+    // Set the w element of a quaternion (scalar data contained in vector data type)
+    // 
+    inline Quat & setW( floatInVec w );
+
+    // Get the x element of a quaternion
+    // 
+    inline const floatInVec getX( ) const;
+
+    // Get the y element of a quaternion
+    // 
+    inline const floatInVec getY( ) const;
+
+    // Get the z element of a quaternion
+    // 
+    inline const floatInVec getZ( ) const;
+
+    // Get the w element of a quaternion
+    // 
+    inline const floatInVec getW( ) const;
+
+    // Set an x, y, z, or w element of a quaternion by index
+    // 
+    inline Quat & setElem( int idx, float value );
+
+    // Set an x, y, z, or w element of a quaternion by index (scalar data contained in vector data type)
+    // 
+    inline Quat & setElem( int idx, floatInVec value );
+
+    // Get an x, y, z, or w element of a quaternion by index
+    // 
+    inline const floatInVec getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline const floatInVec operator []( int idx ) const;
+
+    // Add two quaternions
+    // 
+    inline const Quat operator +( Quat quat ) const;
+
+    // Subtract a quaternion from another quaternion
+    // 
+    inline const Quat operator -( Quat quat ) const;
+
+    // Multiply two quaternions
+    // 
+    inline const Quat operator *( Quat quat ) const;
+
+    // Multiply a quaternion by a scalar
+    // 
+    inline const Quat operator *( float scalar ) const;
+
+    // Divide a quaternion by a scalar
+    // 
+    inline const Quat operator /( float scalar ) const;
+
+    // Multiply a quaternion by a scalar (scalar data contained in vector data type)
+    // 
+    inline const Quat operator *( floatInVec scalar ) const;
+
+    // Divide a quaternion by a scalar (scalar data contained in vector data type)
+    // 
+    inline const Quat operator /( floatInVec scalar ) const;
+
+    // Perform compound assignment and addition with a quaternion
+    // 
+    inline Quat & operator +=( Quat quat );
+
+    // Perform compound assignment and subtraction by a quaternion
+    // 
+    inline Quat & operator -=( Quat quat );
+
+    // Perform compound assignment and multiplication by a quaternion
+    // 
+    inline Quat & operator *=( Quat quat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Quat & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Quat & operator /=( float scalar );
+
+    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
+    // 
+    inline Quat & operator *=( floatInVec scalar );
+
+    // Perform compound assignment and division by a scalar (scalar data contained in vector data type)
+    // 
+    inline Quat & operator /=( floatInVec scalar );
+
+    // Negate all elements of a quaternion
+    // 
+    inline const Quat operator -( ) const;
+
+    // Construct an identity quaternion
+    // 
+    static inline const Quat identity( );
+
+    // Construct a quaternion to rotate between two unit-length 3-D vectors
+    // NOTE: 
+    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+    // 
+    static inline const Quat rotation( Vector3 unitVec0, Vector3 unitVec1 );
+
+    // Construct a quaternion to rotate around a unit-length 3-D vector
+    // 
+    static inline const Quat rotation( float radians, Vector3 unitVec );
+
+    // Construct a quaternion to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
+    // 
+    static inline const Quat rotation( floatInVec radians, Vector3 unitVec );
+
+    // Construct a quaternion to rotate around the x axis
+    // 
+    static inline const Quat rotationX( float radians );
+
+    // Construct a quaternion to rotate around the y axis
+    // 
+    static inline const Quat rotationY( float radians );
+
+    // Construct a quaternion to rotate around the z axis
+    // 
+    static inline const Quat rotationZ( float radians );
+
+    // Construct a quaternion to rotate around the x axis (scalar data contained in vector data type)
+    // 
+    static inline const Quat rotationX( floatInVec radians );
+
+    // Construct a quaternion to rotate around the y axis (scalar data contained in vector data type)
+    // 
+    static inline const Quat rotationY( floatInVec radians );
+
+    // Construct a quaternion to rotate around the z axis (scalar data contained in vector data type)
+    // 
+    static inline const Quat rotationZ( floatInVec radians );
+
+};
+
+// Multiply a quaternion by a scalar
+// 
+inline const Quat operator *( float scalar, Quat quat );
+
+// Multiply a quaternion by a scalar (scalar data contained in vector data type)
+// 
+inline const Quat operator *( floatInVec scalar, Quat quat );
+
+// Compute the conjugate of a quaternion
+// 
+inline const Quat conj( Quat quat );
+
+// Use a unit-length quaternion to rotate a 3-D vector
+// 
+inline const Vector3 rotate( Quat unitQuat, Vector3 vec );
+
+// Compute the dot product of two quaternions
+// 
+inline const floatInVec dot( Quat quat0, Quat quat1 );
+
+// Compute the norm of a quaternion
+// 
+inline const floatInVec norm( Quat quat );
+
+// Compute the length of a quaternion
+// 
+inline const floatInVec length( Quat quat );
+
+// Normalize a quaternion
+// NOTE: 
+// The result is unpredictable when all elements of quat are at or near zero.
+// 
+inline const Quat normalize( Quat quat );
+
+// Linear interpolation between two quaternions
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat lerp( float t, Quat quat0, Quat quat1 );
+
+// Linear interpolation between two quaternions (scalar data contained in vector data type)
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat lerp( floatInVec t, Quat quat0, Quat quat1 );
+
+// Spherical linear interpolation between two quaternions
+// NOTE: 
+// Interpolates along the shortest path between orientations.
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat slerp( float t, Quat unitQuat0, Quat unitQuat1 );
+
+// Spherical linear interpolation between two quaternions (scalar data contained in vector data type)
+// NOTE: 
+// Interpolates along the shortest path between orientations.
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat slerp( floatInVec t, Quat unitQuat0, Quat unitQuat1 );
+
+// Spherical quadrangle interpolation
+// 
+inline const Quat squad( float t, Quat unitQuat0, Quat unitQuat1, Quat unitQuat2, Quat unitQuat3 );
+
+// Spherical quadrangle interpolation (scalar data contained in vector data type)
+// 
+inline const Quat squad( floatInVec t, Quat unitQuat0, Quat unitQuat1, Quat unitQuat2, Quat unitQuat3 );
+
+// Conditionally select between two quaternions
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+inline const Quat select( Quat quat0, Quat quat1, bool select1 );
+
+// Conditionally select between two quaternions (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Quat select( Quat quat0, Quat quat1, boolInVec select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a quaternion
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Quat quat );
+
+// Print a quaternion and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Quat quat, const char * name );
+
+#endif
+
+// A 3x3 matrix in array-of-structures format
+//
+class Matrix3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix3( ) { };
+
+    // Copy a 3x3 matrix
+    // 
+    inline Matrix3( const Matrix3 & mat );
+
+    // Construct a 3x3 matrix containing the specified columns
+    // 
+    inline Matrix3( Vector3 col0, Vector3 col1, Vector3 col2 );
+
+    // Construct a 3x3 rotation matrix from a unit-length quaternion
+    // 
+    explicit inline Matrix3( Quat unitQuat );
+
+    // Set all elements of a 3x3 matrix to the same scalar value
+    // 
+    explicit inline Matrix3( float scalar );
+
+    // Set all elements of a 3x3 matrix to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit inline Matrix3( floatInVec scalar );
+
+    // Assign one 3x3 matrix to another
+    // 
+    inline Matrix3 & operator =( const Matrix3 & mat );
+
+    // Set column 0 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol0( Vector3 col0 );
+
+    // Set column 1 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol1( Vector3 col1 );
+
+    // Set column 2 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol2( Vector3 col2 );
+
+    // Get column 0 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Set the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setCol( int col, Vector3 vec );
+
+    // Set the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setRow( int row, Vector3 vec );
+
+    // Get the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline Matrix3 & setElem( int col, int row, float val );
+
+    // Set the element of a 3x3 matrix referred to by column and row indices (scalar data contained in vector data type)
+    // 
+    inline Matrix3 & setElem( int col, int row, floatInVec val );
+
+    // Get the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline const floatInVec getElem( int col, int row ) const;
+
+    // Add two 3x3 matrices
+    // 
+    inline const Matrix3 operator +( const Matrix3 & mat ) const;
+
+    // Subtract a 3x3 matrix from another 3x3 matrix
+    // 
+    inline const Matrix3 operator -( const Matrix3 & mat ) const;
+
+    // Negate all elements of a 3x3 matrix
+    // 
+    inline const Matrix3 operator -( ) const;
+
+    // Multiply a 3x3 matrix by a scalar
+    // 
+    inline const Matrix3 operator *( float scalar ) const;
+
+    // Multiply a 3x3 matrix by a scalar (scalar data contained in vector data type)
+    // 
+    inline const Matrix3 operator *( floatInVec scalar ) const;
+
+    // Multiply a 3x3 matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( Vector3 vec ) const;
+
+    // Multiply two 3x3 matrices
+    // 
+    inline const Matrix3 operator *( const Matrix3 & mat ) const;
+
+    // Perform compound assignment and addition with a 3x3 matrix
+    // 
+    inline Matrix3 & operator +=( const Matrix3 & mat );
+
+    // Perform compound assignment and subtraction by a 3x3 matrix
+    // 
+    inline Matrix3 & operator -=( const Matrix3 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix3 & operator *=( float scalar );
+
+    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
+    // 
+    inline Matrix3 & operator *=( floatInVec scalar );
+
+    // Perform compound assignment and multiplication by a 3x3 matrix
+    // 
+    inline Matrix3 & operator *=( const Matrix3 & mat );
+
+    // Construct an identity 3x3 matrix
+    // 
+    static inline const Matrix3 identity( );
+
+    // Construct a 3x3 matrix to rotate around the x axis
+    // 
+    static inline const Matrix3 rotationX( float radians );
+
+    // Construct a 3x3 matrix to rotate around the y axis
+    // 
+    static inline const Matrix3 rotationY( float radians );
+
+    // Construct a 3x3 matrix to rotate around the z axis
+    // 
+    static inline const Matrix3 rotationZ( float radians );
+
+    // Construct a 3x3 matrix to rotate around the x axis (scalar data contained in vector data type)
+    // 
+    static inline const Matrix3 rotationX( floatInVec radians );
+
+    // Construct a 3x3 matrix to rotate around the y axis (scalar data contained in vector data type)
+    // 
+    static inline const Matrix3 rotationY( floatInVec radians );
+
+    // Construct a 3x3 matrix to rotate around the z axis (scalar data contained in vector data type)
+    // 
+    static inline const Matrix3 rotationZ( floatInVec radians );
+
+    // Construct a 3x3 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix3 rotationZYX( Vector3 radiansXYZ );
+
+    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix3 rotation( float radians, Vector3 unitVec );
+
+    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
+    // 
+    static inline const Matrix3 rotation( floatInVec radians, Vector3 unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix3 rotation( Quat unitQuat );
+
+    // Construct a 3x3 matrix to perform scaling
+    // 
+    static inline const Matrix3 scale( Vector3 scaleVec );
+
+};
+// Multiply a 3x3 matrix by a scalar
+// 
+inline const Matrix3 operator *( float scalar, const Matrix3 & mat );
+
+// Multiply a 3x3 matrix by a scalar (scalar data contained in vector data type)
+// 
+inline const Matrix3 operator *( floatInVec scalar, const Matrix3 & mat );
+
+// Append (post-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 appendScale( const Matrix3 & mat, Vector3 scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 prependScale( Vector3 scaleVec, const Matrix3 & mat );
+
+// Multiply two 3x3 matrices per element
+// 
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
+
+// Compute the absolute value of a 3x3 matrix per element
+// 
+inline const Matrix3 absPerElem( const Matrix3 & mat );
+
+// Transpose of a 3x3 matrix
+// 
+inline const Matrix3 transpose( const Matrix3 & mat );
+
+// Compute the inverse of a 3x3 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix3 inverse( const Matrix3 & mat );
+
+// Determinant of a 3x3 matrix
+// 
+inline const floatInVec determinant( const Matrix3 & mat );
+
+// Conditionally select between two 3x3 matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 );
+
+// Conditionally select between two 3x3 matrices (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, boolInVec select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x3 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat );
+
+// Print a 3x3 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat, const char * name );
+
+#endif
+
+// A 4x4 matrix in array-of-structures format
+//
+class Matrix4
+{
+    Vector4 mCol0;
+    Vector4 mCol1;
+    Vector4 mCol2;
+    Vector4 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix4( ) { };
+
+    // Copy a 4x4 matrix
+    // 
+    inline Matrix4( const Matrix4 & mat );
+
+    // Construct a 4x4 matrix containing the specified columns
+    // 
+    inline Matrix4( Vector4 col0, Vector4 col1, Vector4 col2, Vector4 col3 );
+
+    // Construct a 4x4 matrix from a 3x4 transformation matrix
+    // 
+    explicit inline Matrix4( const Transform3 & mat );
+
+    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Matrix4( const Matrix3 & mat, Vector3 translateVec );
+
+    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Matrix4( Quat unitQuat, Vector3 translateVec );
+
+    // Set all elements of a 4x4 matrix to the same scalar value
+    // 
+    explicit inline Matrix4( float scalar );
+
+    // Set all elements of a 4x4 matrix to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit inline Matrix4( floatInVec scalar );
+
+    // Assign one 4x4 matrix to another
+    // 
+    inline Matrix4 & operator =( const Matrix4 & mat );
+
+    // Set the upper-left 3x3 submatrix
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 4x4 matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setTranslation( Vector3 translateVec );
+
+    // Get the translation component of a 4x4 matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol0( Vector4 col0 );
+
+    // Set column 1 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol1( Vector4 col1 );
+
+    // Set column 2 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol2( Vector4 col2 );
+
+    // Set column 3 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol3( Vector4 col3 );
+
+    // Get column 0 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol0( ) const;
+
+    // Get column 1 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol1( ) const;
+
+    // Get column 2 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol2( ) const;
+
+    // Get column 3 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol3( ) const;
+
+    // Set the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setCol( int col, Vector4 vec );
+
+    // Set the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setRow( int row, Vector4 vec );
+
+    // Get the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getCol( int col ) const;
+
+    // Get the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector4 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector4 operator []( int col ) const;
+
+    // Set the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline Matrix4 & setElem( int col, int row, float val );
+
+    // Set the element of a 4x4 matrix referred to by column and row indices (scalar data contained in vector data type)
+    // 
+    inline Matrix4 & setElem( int col, int row, floatInVec val );
+
+    // Get the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline const floatInVec getElem( int col, int row ) const;
+
+    // Add two 4x4 matrices
+    // 
+    inline const Matrix4 operator +( const Matrix4 & mat ) const;
+
+    // Subtract a 4x4 matrix from another 4x4 matrix
+    // 
+    inline const Matrix4 operator -( const Matrix4 & mat ) const;
+
+    // Negate all elements of a 4x4 matrix
+    // 
+    inline const Matrix4 operator -( ) const;
+
+    // Multiply a 4x4 matrix by a scalar
+    // 
+    inline const Matrix4 operator *( float scalar ) const;
+
+    // Multiply a 4x4 matrix by a scalar (scalar data contained in vector data type)
+    // 
+    inline const Matrix4 operator *( floatInVec scalar ) const;
+
+    // Multiply a 4x4 matrix by a 4-D vector
+    // 
+    inline const Vector4 operator *( Vector4 vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D vector
+    // 
+    inline const Vector4 operator *( Vector3 vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D point
+    // 
+    inline const Vector4 operator *( Point3 pnt ) const;
+
+    // Multiply two 4x4 matrices
+    // 
+    inline const Matrix4 operator *( const Matrix4 & mat ) const;
+
+    // Multiply a 4x4 matrix by a 3x4 transformation matrix
+    // 
+    inline const Matrix4 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and addition with a 4x4 matrix
+    // 
+    inline Matrix4 & operator +=( const Matrix4 & mat );
+
+    // Perform compound assignment and subtraction by a 4x4 matrix
+    // 
+    inline Matrix4 & operator -=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix4 & operator *=( float scalar );
+
+    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
+    // 
+    inline Matrix4 & operator *=( floatInVec scalar );
+
+    // Perform compound assignment and multiplication by a 4x4 matrix
+    // 
+    inline Matrix4 & operator *=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Matrix4 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 4x4 matrix
+    // 
+    static inline const Matrix4 identity( );
+
+    // Construct a 4x4 matrix to rotate around the x axis
+    // 
+    static inline const Matrix4 rotationX( float radians );
+
+    // Construct a 4x4 matrix to rotate around the y axis
+    // 
+    static inline const Matrix4 rotationY( float radians );
+
+    // Construct a 4x4 matrix to rotate around the z axis
+    // 
+    static inline const Matrix4 rotationZ( float radians );
+
+    // Construct a 4x4 matrix to rotate around the x axis (scalar data contained in vector data type)
+    // 
+    static inline const Matrix4 rotationX( floatInVec radians );
+
+    // Construct a 4x4 matrix to rotate around the y axis (scalar data contained in vector data type)
+    // 
+    static inline const Matrix4 rotationY( floatInVec radians );
+
+    // Construct a 4x4 matrix to rotate around the z axis (scalar data contained in vector data type)
+    // 
+    static inline const Matrix4 rotationZ( floatInVec radians );
+
+    // Construct a 4x4 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix4 rotationZYX( Vector3 radiansXYZ );
+
+    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix4 rotation( float radians, Vector3 unitVec );
+
+    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
+    // 
+    static inline const Matrix4 rotation( floatInVec radians, Vector3 unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix4 rotation( Quat unitQuat );
+
+    // Construct a 4x4 matrix to perform scaling
+    // 
+    static inline const Matrix4 scale( Vector3 scaleVec );
+
+    // Construct a 4x4 matrix to perform translation
+    // 
+    static inline const Matrix4 translation( Vector3 translateVec );
+
+    // Construct viewing matrix based on eye position, position looked at, and up direction
+    // 
+    static inline const Matrix4 lookAt( Point3 eyePos, Point3 lookAtPos, Vector3 upVec );
+
+    // Construct a perspective projection matrix
+    // 
+    static inline const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar );
+
+    // Construct a perspective projection matrix based on frustum
+    // 
+    static inline const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar );
+
+    // Construct an orthographic projection matrix
+    // 
+    static inline const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar );
+
+};
+// Multiply a 4x4 matrix by a scalar
+// 
+inline const Matrix4 operator *( float scalar, const Matrix4 & mat );
+
+// Multiply a 4x4 matrix by a scalar (scalar data contained in vector data type)
+// 
+inline const Matrix4 operator *( floatInVec scalar, const Matrix4 & mat );
+
+// Append (post-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 appendScale( const Matrix4 & mat, Vector3 scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 prependScale( Vector3 scaleVec, const Matrix4 & mat );
+
+// Multiply two 4x4 matrices per element
+// 
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
+
+// Compute the absolute value of a 4x4 matrix per element
+// 
+inline const Matrix4 absPerElem( const Matrix4 & mat );
+
+// Transpose of a 4x4 matrix
+// 
+inline const Matrix4 transpose( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 inverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 affineInverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+// 
+inline const Matrix4 orthoInverse( const Matrix4 & mat );
+
+// Determinant of a 4x4 matrix
+// 
+inline const floatInVec determinant( const Matrix4 & mat );
+
+// Conditionally select between two 4x4 matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 );
+
+// Conditionally select between two 4x4 matrices (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, boolInVec select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4x4 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat );
+
+// Print a 4x4 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat, const char * name );
+
+#endif
+
+// A 3x4 transformation matrix in array-of-structures format
+//
+class Transform3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+    Vector3 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Transform3( ) { };
+
+    // Copy a 3x4 transformation matrix
+    // 
+    inline Transform3( const Transform3 & tfrm );
+
+    // Construct a 3x4 transformation matrix containing the specified columns
+    // 
+    inline Transform3( Vector3 col0, Vector3 col1, Vector3 col2, Vector3 col3 );
+
+    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Transform3( const Matrix3 & tfrm, Vector3 translateVec );
+
+    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Transform3( Quat unitQuat, Vector3 translateVec );
+
+    // Set all elements of a 3x4 transformation matrix to the same scalar value
+    // 
+    explicit inline Transform3( float scalar );
+
+    // Set all elements of a 3x4 transformation matrix to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit inline Transform3( floatInVec scalar );
+
+    // Assign one 3x4 transformation matrix to another
+    // 
+    inline Transform3 & operator =( const Transform3 & tfrm );
+
+    // Set the upper-left 3x3 submatrix
+    // 
+    inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // 
+    inline Transform3 & setTranslation( Vector3 translateVec );
+
+    // Get the translation component of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol0( Vector3 col0 );
+
+    // Set column 1 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol1( Vector3 col1 );
+
+    // Set column 2 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol2( Vector3 col2 );
+
+    // Set column 3 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol3( Vector3 col3 );
+
+    // Get column 0 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Get column 3 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol3( ) const;
+
+    // Set the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setCol( int col, Vector3 vec );
+
+    // Set the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setRow( int row, Vector4 vec );
+
+    // Get the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline Transform3 & setElem( int col, int row, float val );
+
+    // Set the element of a 3x4 transformation matrix referred to by column and row indices (scalar data contained in vector data type)
+    // 
+    inline Transform3 & setElem( int col, int row, floatInVec val );
+
+    // Get the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline const floatInVec getElem( int col, int row ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( Vector3 vec ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D point
+    // 
+    inline const Point3 operator *( Point3 pnt ) const;
+
+    // Multiply two 3x4 transformation matrices
+    // 
+    inline const Transform3 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Transform3 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 3x4 transformation matrix
+    // 
+    static inline const Transform3 identity( );
+
+    // Construct a 3x4 transformation matrix to rotate around the x axis
+    // 
+    static inline const Transform3 rotationX( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the y axis
+    // 
+    static inline const Transform3 rotationY( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the z axis
+    // 
+    static inline const Transform3 rotationZ( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the x axis (scalar data contained in vector data type)
+    // 
+    static inline const Transform3 rotationX( floatInVec radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the y axis (scalar data contained in vector data type)
+    // 
+    static inline const Transform3 rotationY( floatInVec radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the z axis (scalar data contained in vector data type)
+    // 
+    static inline const Transform3 rotationZ( floatInVec radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Transform3 rotationZYX( Vector3 radiansXYZ );
+
+    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Transform3 rotation( float radians, Vector3 unitVec );
+
+    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
+    // 
+    static inline const Transform3 rotation( floatInVec radians, Vector3 unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Transform3 rotation( Quat unitQuat );
+
+    // Construct a 3x4 transformation matrix to perform scaling
+    // 
+    static inline const Transform3 scale( Vector3 scaleVec );
+
+    // Construct a 3x4 transformation matrix to perform translation
+    // 
+    static inline const Transform3 translation( Vector3 translateVec );
+
+};
+// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 appendScale( const Transform3 & tfrm, Vector3 scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 prependScale( Vector3 scaleVec, const Transform3 & tfrm );
+
+// Multiply two 3x4 transformation matrices per element
+// 
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
+
+// Compute the absolute value of a 3x4 transformation matrix per element
+// 
+inline const Transform3 absPerElem( const Transform3 & tfrm );
+
+// Inverse of a 3x4 transformation matrix
+// NOTE: 
+// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+// 
+inline const Transform3 inverse( const Transform3 & tfrm );
+
+// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+// 
+inline const Transform3 orthoInverse( const Transform3 & tfrm );
+
+// Conditionally select between two 3x4 transformation matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 );
+
+// Conditionally select between two 3x4 transformation matrices (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, boolInVec select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x4 transformation matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm );
+
+// Print a 3x4 transformation matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm, const char * name );
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#include "vec_aos.h"
+#include "quat_aos.h"
+#include "mat_aos.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vectormath_soa.h b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vectormath_soa.h
index 4b99c3235..65932d803 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vectormath_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vectormath_soa.h
@@ -1,1919 +1,1919 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_SOA_CPP_PPU_H
-#define _VECTORMATH_SOA_CPP_PPU_H
-
-#include <math.h>
-#include <altivec.h>
-#include "vectormath_aos.h"
-
-#ifdef _VECTORMATH_DEBUG
-#include <stdio.h>
-#endif
-
-namespace Vectormath {
-
-namespace Soa {
-
-//-----------------------------------------------------------------------------
-// Forward Declarations
-//
-
-class Vector3;
-class Vector4;
-class Point3;
-class Quat;
-class Matrix3;
-class Matrix4;
-class Transform3;
-
-// A set of four 3-D vectors in structure-of-arrays format
-//
-class Vector3
-{
-    typedef vec_float4 vec_float4_t;
-    vec_float4 mX;
-    vec_float4 mY;
-    vec_float4 mZ;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Vector3( ) { };
-
-    // Copy a 3-D vector
-    // 
-    inline Vector3( const Vector3 & vec );
-
-    // Construct a 3-D vector from x, y, and z elements
-    // 
-    inline Vector3( vec_float4 x, vec_float4 y, vec_float4 z );
-
-    // Copy elements from a 3-D point into a 3-D vector
-    // 
-    explicit inline Vector3( const Point3 & pnt );
-
-    // Set all elements of a 3-D vector to the same scalar value
-    // 
-    explicit inline Vector3( vec_float4 scalar );
-
-    // Replicate an AoS 3-D vector
-    // 
-    inline Vector3( Aos::Vector3 vec );
-
-    // Insert four AoS 3-D vectors
-    // 
-    inline Vector3( Aos::Vector3 vec0, Aos::Vector3 vec1, Aos::Vector3 vec2, Aos::Vector3 vec3 );
-
-    // Extract four AoS 3-D vectors
-    // 
-    inline void get4Aos( Aos::Vector3 & result0, Aos::Vector3 & result1, Aos::Vector3 & result2, Aos::Vector3 & result3 ) const;
-
-    // Assign one 3-D vector to another
-    // 
-    inline Vector3 & operator =( const Vector3 & vec );
-
-    // Set the x element of a 3-D vector
-    // 
-    inline Vector3 & setX( vec_float4 x );
-
-    // Set the y element of a 3-D vector
-    // 
-    inline Vector3 & setY( vec_float4 y );
-
-    // Set the z element of a 3-D vector
-    // 
-    inline Vector3 & setZ( vec_float4 z );
-
-    // Get the x element of a 3-D vector
-    // 
-    inline vec_float4 getX( ) const;
-
-    // Get the y element of a 3-D vector
-    // 
-    inline vec_float4 getY( ) const;
-
-    // Get the z element of a 3-D vector
-    // 
-    inline vec_float4 getZ( ) const;
-
-    // Set an x, y, or z element of a 3-D vector by index
-    // 
-    inline Vector3 & setElem( int idx, vec_float4 value );
-
-    // Get an x, y, or z element of a 3-D vector by index
-    // 
-    inline vec_float4 getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline vec_float4_t & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline vec_float4 operator []( int idx ) const;
-
-    // Add two 3-D vectors
-    // 
-    inline const Vector3 operator +( const Vector3 & vec ) const;
-
-    // Subtract a 3-D vector from another 3-D vector
-    // 
-    inline const Vector3 operator -( const Vector3 & vec ) const;
-
-    // Add a 3-D vector to a 3-D point
-    // 
-    inline const Point3 operator +( const Point3 & pnt ) const;
-
-    // Multiply a 3-D vector by a scalar
-    // 
-    inline const Vector3 operator *( vec_float4 scalar ) const;
-
-    // Divide a 3-D vector by a scalar
-    // 
-    inline const Vector3 operator /( vec_float4 scalar ) const;
-
-    // Perform compound assignment and addition with a 3-D vector
-    // 
-    inline Vector3 & operator +=( const Vector3 & vec );
-
-    // Perform compound assignment and subtraction by a 3-D vector
-    // 
-    inline Vector3 & operator -=( const Vector3 & vec );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Vector3 & operator *=( vec_float4 scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Vector3 & operator /=( vec_float4 scalar );
-
-    // Negate all elements of a 3-D vector
-    // 
-    inline const Vector3 operator -( ) const;
-
-    // Construct x axis
-    // 
-    static inline const Vector3 xAxis( );
-
-    // Construct y axis
-    // 
-    static inline const Vector3 yAxis( );
-
-    // Construct z axis
-    // 
-    static inline const Vector3 zAxis( );
-
-};
-
-// Multiply a 3-D vector by a scalar
-// 
-inline const Vector3 operator *( vec_float4 scalar, const Vector3 & vec );
-
-// Multiply two 3-D vectors per element
-// 
-inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Divide two 3-D vectors per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Compute the reciprocal of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Vector3 recipPerElem( const Vector3 & vec );
-
-// Compute the square root of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Vector3 sqrtPerElem( const Vector3 & vec );
-
-// Compute the reciprocal square root of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Vector3 rsqrtPerElem( const Vector3 & vec );
-
-// Compute the absolute value of a 3-D vector per element
-// 
-inline const Vector3 absPerElem( const Vector3 & vec );
-
-// Copy sign from one 3-D vector to another, per element
-// 
-inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Maximum of two 3-D vectors per element
-// 
-inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Minimum of two 3-D vectors per element
-// 
-inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Maximum element of a 3-D vector
-// 
-inline vec_float4 maxElem( const Vector3 & vec );
-
-// Minimum element of a 3-D vector
-// 
-inline vec_float4 minElem( const Vector3 & vec );
-
-// Compute the sum of all elements of a 3-D vector
-// 
-inline vec_float4 sum( const Vector3 & vec );
-
-// Compute the dot product of two 3-D vectors
-// 
-inline vec_float4 dot( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Compute the square of the length of a 3-D vector
-// 
-inline vec_float4 lengthSqr( const Vector3 & vec );
-
-// Compute the length of a 3-D vector
-// 
-inline vec_float4 length( const Vector3 & vec );
-
-// Normalize a 3-D vector
-// NOTE: 
-// The result is unpredictable when all elements of vec are at or near zero.
-// 
-inline const Vector3 normalize( const Vector3 & vec );
-
-// Compute cross product of two 3-D vectors
-// 
-inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Outer product of two 3-D vectors
-// 
-inline const Matrix3 outer( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Pre-multiply a row vector by a 3x3 matrix
-// 
-inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat );
-
-// Cross-product matrix of a 3-D vector
-// 
-inline const Matrix3 crossMatrix( const Vector3 & vec );
-
-// Create cross-product matrix and multiply
-// NOTE: 
-// Faster than separately creating a cross-product matrix and multiplying.
-// 
-inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat );
-
-// Linear interpolation between two 3-D vectors
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 lerp( vec_float4 t, const Vector3 & vec0, const Vector3 & vec1 );
-
-// Spherical linear interpolation between two 3-D vectors
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 slerp( vec_float4 t, const Vector3 & unitVec0, const Vector3 & unitVec1 );
-
-// Conditionally select between two 3-D vectors
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, vec_uint4 select1 );
-
-// Load four three-float 3-D vectors, stored in three quadwords
-// 
-inline void loadXYZArray( Vector3 & vec, const vec_float4 * threeQuads );
-
-// Store four slots of an SoA 3-D vector in three quadwords
-// 
-inline void storeXYZArray( const Vector3 & vec, vec_float4 * threeQuads );
-
-// Store eight slots of two SoA 3-D vectors as half-floats
-// 
-inline void storeHalfFloats( const Vector3 & vec0, const Vector3 & vec1, vec_ushort8 * threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3-D vector
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector3 & vec );
-
-// Print a 3-D vector and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector3 & vec, const char * name );
-
-#endif
-
-// A set of four 4-D vectors in structure-of-arrays format
-//
-class Vector4
-{
-    typedef vec_float4 vec_float4_t;
-    vec_float4 mX;
-    vec_float4 mY;
-    vec_float4 mZ;
-    vec_float4 mW;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Vector4( ) { };
-
-    // Copy a 4-D vector
-    // 
-    inline Vector4( const Vector4 & vec );
-
-    // Construct a 4-D vector from x, y, z, and w elements
-    // 
-    inline Vector4( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
-
-    // Construct a 4-D vector from a 3-D vector and a scalar
-    // 
-    inline Vector4( const Vector3 & xyz, vec_float4 w );
-
-    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
-    // 
-    explicit inline Vector4( const Vector3 & vec );
-
-    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
-    // 
-    explicit inline Vector4( const Point3 & pnt );
-
-    // Copy elements from a quaternion into a 4-D vector
-    // 
-    explicit inline Vector4( const Quat & quat );
-
-    // Set all elements of a 4-D vector to the same scalar value
-    // 
-    explicit inline Vector4( vec_float4 scalar );
-
-    // Replicate an AoS 4-D vector
-    // 
-    inline Vector4( Aos::Vector4 vec );
-
-    // Insert four AoS 4-D vectors
-    // 
-    inline Vector4( Aos::Vector4 vec0, Aos::Vector4 vec1, Aos::Vector4 vec2, Aos::Vector4 vec3 );
-
-    // Extract four AoS 4-D vectors
-    // 
-    inline void get4Aos( Aos::Vector4 & result0, Aos::Vector4 & result1, Aos::Vector4 & result2, Aos::Vector4 & result3 ) const;
-
-    // Assign one 4-D vector to another
-    // 
-    inline Vector4 & operator =( const Vector4 & vec );
-
-    // Set the x, y, and z elements of a 4-D vector
-    // NOTE: 
-    // This function does not change the w element.
-    // 
-    inline Vector4 & setXYZ( const Vector3 & vec );
-
-    // Get the x, y, and z elements of a 4-D vector
-    // 
-    inline const Vector3 getXYZ( ) const;
-
-    // Set the x element of a 4-D vector
-    // 
-    inline Vector4 & setX( vec_float4 x );
-
-    // Set the y element of a 4-D vector
-    // 
-    inline Vector4 & setY( vec_float4 y );
-
-    // Set the z element of a 4-D vector
-    // 
-    inline Vector4 & setZ( vec_float4 z );
-
-    // Set the w element of a 4-D vector
-    // 
-    inline Vector4 & setW( vec_float4 w );
-
-    // Get the x element of a 4-D vector
-    // 
-    inline vec_float4 getX( ) const;
-
-    // Get the y element of a 4-D vector
-    // 
-    inline vec_float4 getY( ) const;
-
-    // Get the z element of a 4-D vector
-    // 
-    inline vec_float4 getZ( ) const;
-
-    // Get the w element of a 4-D vector
-    // 
-    inline vec_float4 getW( ) const;
-
-    // Set an x, y, z, or w element of a 4-D vector by index
-    // 
-    inline Vector4 & setElem( int idx, vec_float4 value );
-
-    // Get an x, y, z, or w element of a 4-D vector by index
-    // 
-    inline vec_float4 getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline vec_float4_t & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline vec_float4 operator []( int idx ) const;
-
-    // Add two 4-D vectors
-    // 
-    inline const Vector4 operator +( const Vector4 & vec ) const;
-
-    // Subtract a 4-D vector from another 4-D vector
-    // 
-    inline const Vector4 operator -( const Vector4 & vec ) const;
-
-    // Multiply a 4-D vector by a scalar
-    // 
-    inline const Vector4 operator *( vec_float4 scalar ) const;
-
-    // Divide a 4-D vector by a scalar
-    // 
-    inline const Vector4 operator /( vec_float4 scalar ) const;
-
-    // Perform compound assignment and addition with a 4-D vector
-    // 
-    inline Vector4 & operator +=( const Vector4 & vec );
-
-    // Perform compound assignment and subtraction by a 4-D vector
-    // 
-    inline Vector4 & operator -=( const Vector4 & vec );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Vector4 & operator *=( vec_float4 scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Vector4 & operator /=( vec_float4 scalar );
-
-    // Negate all elements of a 4-D vector
-    // 
-    inline const Vector4 operator -( ) const;
-
-    // Construct x axis
-    // 
-    static inline const Vector4 xAxis( );
-
-    // Construct y axis
-    // 
-    static inline const Vector4 yAxis( );
-
-    // Construct z axis
-    // 
-    static inline const Vector4 zAxis( );
-
-    // Construct w axis
-    // 
-    static inline const Vector4 wAxis( );
-
-};
-
-// Multiply a 4-D vector by a scalar
-// 
-inline const Vector4 operator *( vec_float4 scalar, const Vector4 & vec );
-
-// Multiply two 4-D vectors per element
-// 
-inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Divide two 4-D vectors per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Compute the reciprocal of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Vector4 recipPerElem( const Vector4 & vec );
-
-// Compute the square root of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Vector4 sqrtPerElem( const Vector4 & vec );
-
-// Compute the reciprocal square root of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Vector4 rsqrtPerElem( const Vector4 & vec );
-
-// Compute the absolute value of a 4-D vector per element
-// 
-inline const Vector4 absPerElem( const Vector4 & vec );
-
-// Copy sign from one 4-D vector to another, per element
-// 
-inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Maximum of two 4-D vectors per element
-// 
-inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Minimum of two 4-D vectors per element
-// 
-inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Maximum element of a 4-D vector
-// 
-inline vec_float4 maxElem( const Vector4 & vec );
-
-// Minimum element of a 4-D vector
-// 
-inline vec_float4 minElem( const Vector4 & vec );
-
-// Compute the sum of all elements of a 4-D vector
-// 
-inline vec_float4 sum( const Vector4 & vec );
-
-// Compute the dot product of two 4-D vectors
-// 
-inline vec_float4 dot( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Compute the square of the length of a 4-D vector
-// 
-inline vec_float4 lengthSqr( const Vector4 & vec );
-
-// Compute the length of a 4-D vector
-// 
-inline vec_float4 length( const Vector4 & vec );
-
-// Normalize a 4-D vector
-// NOTE: 
-// The result is unpredictable when all elements of vec are at or near zero.
-// 
-inline const Vector4 normalize( const Vector4 & vec );
-
-// Outer product of two 4-D vectors
-// 
-inline const Matrix4 outer( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Linear interpolation between two 4-D vectors
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 lerp( vec_float4 t, const Vector4 & vec0, const Vector4 & vec1 );
-
-// Spherical linear interpolation between two 4-D vectors
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 slerp( vec_float4 t, const Vector4 & unitVec0, const Vector4 & unitVec1 );
-
-// Conditionally select between two 4-D vectors
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, vec_uint4 select1 );
-
-// Store four slots of an SoA 4-D vector as half-floats
-// 
-inline void storeHalfFloats( const Vector4 & vec, vec_ushort8 * twoQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 4-D vector
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector4 & vec );
-
-// Print a 4-D vector and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector4 & vec, const char * name );
-
-#endif
-
-// A set of four 3-D points in structure-of-arrays format
-//
-class Point3
-{
-    typedef vec_float4 vec_float4_t;
-    vec_float4 mX;
-    vec_float4 mY;
-    vec_float4 mZ;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Point3( ) { };
-
-    // Copy a 3-D point
-    // 
-    inline Point3( const Point3 & pnt );
-
-    // Construct a 3-D point from x, y, and z elements
-    // 
-    inline Point3( vec_float4 x, vec_float4 y, vec_float4 z );
-
-    // Copy elements from a 3-D vector into a 3-D point
-    // 
-    explicit inline Point3( const Vector3 & vec );
-
-    // Set all elements of a 3-D point to the same scalar value
-    // 
-    explicit inline Point3( vec_float4 scalar );
-
-    // Replicate an AoS 3-D point
-    // 
-    inline Point3( Aos::Point3 pnt );
-
-    // Insert four AoS 3-D points
-    // 
-    inline Point3( Aos::Point3 pnt0, Aos::Point3 pnt1, Aos::Point3 pnt2, Aos::Point3 pnt3 );
-
-    // Extract four AoS 3-D points
-    // 
-    inline void get4Aos( Aos::Point3 & result0, Aos::Point3 & result1, Aos::Point3 & result2, Aos::Point3 & result3 ) const;
-
-    // Assign one 3-D point to another
-    // 
-    inline Point3 & operator =( const Point3 & pnt );
-
-    // Set the x element of a 3-D point
-    // 
-    inline Point3 & setX( vec_float4 x );
-
-    // Set the y element of a 3-D point
-    // 
-    inline Point3 & setY( vec_float4 y );
-
-    // Set the z element of a 3-D point
-    // 
-    inline Point3 & setZ( vec_float4 z );
-
-    // Get the x element of a 3-D point
-    // 
-    inline vec_float4 getX( ) const;
-
-    // Get the y element of a 3-D point
-    // 
-    inline vec_float4 getY( ) const;
-
-    // Get the z element of a 3-D point
-    // 
-    inline vec_float4 getZ( ) const;
-
-    // Set an x, y, or z element of a 3-D point by index
-    // 
-    inline Point3 & setElem( int idx, vec_float4 value );
-
-    // Get an x, y, or z element of a 3-D point by index
-    // 
-    inline vec_float4 getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline vec_float4_t & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline vec_float4 operator []( int idx ) const;
-
-    // Subtract a 3-D point from another 3-D point
-    // 
-    inline const Vector3 operator -( const Point3 & pnt ) const;
-
-    // Add a 3-D point to a 3-D vector
-    // 
-    inline const Point3 operator +( const Vector3 & vec ) const;
-
-    // Subtract a 3-D vector from a 3-D point
-    // 
-    inline const Point3 operator -( const Vector3 & vec ) const;
-
-    // Perform compound assignment and addition with a 3-D vector
-    // 
-    inline Point3 & operator +=( const Vector3 & vec );
-
-    // Perform compound assignment and subtraction by a 3-D vector
-    // 
-    inline Point3 & operator -=( const Vector3 & vec );
-
-};
-
-// Multiply two 3-D points per element
-// 
-inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Divide two 3-D points per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Compute the reciprocal of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Point3 recipPerElem( const Point3 & pnt );
-
-// Compute the square root of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Point3 sqrtPerElem( const Point3 & pnt );
-
-// Compute the reciprocal square root of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Point3 rsqrtPerElem( const Point3 & pnt );
-
-// Compute the absolute value of a 3-D point per element
-// 
-inline const Point3 absPerElem( const Point3 & pnt );
-
-// Copy sign from one 3-D point to another, per element
-// 
-inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Maximum of two 3-D points per element
-// 
-inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Minimum of two 3-D points per element
-// 
-inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Maximum element of a 3-D point
-// 
-inline vec_float4 maxElem( const Point3 & pnt );
-
-// Minimum element of a 3-D point
-// 
-inline vec_float4 minElem( const Point3 & pnt );
-
-// Compute the sum of all elements of a 3-D point
-// 
-inline vec_float4 sum( const Point3 & pnt );
-
-// Apply uniform scale to a 3-D point
-// 
-inline const Point3 scale( const Point3 & pnt, vec_float4 scaleVal );
-
-// Apply non-uniform scale to a 3-D point
-// 
-inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec );
-
-// Scalar projection of a 3-D point on a unit-length 3-D vector
-// 
-inline vec_float4 projection( const Point3 & pnt, const Vector3 & unitVec );
-
-// Compute the square of the distance of a 3-D point from the coordinate-system origin
-// 
-inline vec_float4 distSqrFromOrigin( const Point3 & pnt );
-
-// Compute the distance of a 3-D point from the coordinate-system origin
-// 
-inline vec_float4 distFromOrigin( const Point3 & pnt );
-
-// Compute the square of the distance between two 3-D points
-// 
-inline vec_float4 distSqr( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Compute the distance between two 3-D points
-// 
-inline vec_float4 dist( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Linear interpolation between two 3-D points
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Point3 lerp( vec_float4 t, const Point3 & pnt0, const Point3 & pnt1 );
-
-// Conditionally select between two 3-D points
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, vec_uint4 select1 );
-
-// Load four three-float 3-D points, stored in three quadwords
-// 
-inline void loadXYZArray( Point3 & pnt, const vec_float4 * threeQuads );
-
-// Store four slots of an SoA 3-D point in three quadwords
-// 
-inline void storeXYZArray( const Point3 & pnt, vec_float4 * threeQuads );
-
-// Store eight slots of two SoA 3-D points as half-floats
-// 
-inline void storeHalfFloats( const Point3 & pnt0, const Point3 & pnt1, vec_ushort8 * threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3-D point
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Point3 & pnt );
-
-// Print a 3-D point and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Point3 & pnt, const char * name );
-
-#endif
-
-// A set of four quaternions in structure-of-arrays format
-//
-class Quat
-{
-    typedef vec_float4 vec_float4_t;
-    vec_float4 mX;
-    vec_float4 mY;
-    vec_float4 mZ;
-    vec_float4 mW;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Quat( ) { };
-
-    // Copy a quaternion
-    // 
-    inline Quat( const Quat & quat );
-
-    // Construct a quaternion from x, y, z, and w elements
-    // 
-    inline Quat( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
-
-    // Construct a quaternion from a 3-D vector and a scalar
-    // 
-    inline Quat( const Vector3 & xyz, vec_float4 w );
-
-    // Copy elements from a 4-D vector into a quaternion
-    // 
-    explicit inline Quat( const Vector4 & vec );
-
-    // Convert a rotation matrix to a unit-length quaternion
-    // 
-    explicit inline Quat( const Matrix3 & rotMat );
-
-    // Set all elements of a quaternion to the same scalar value
-    // 
-    explicit inline Quat( vec_float4 scalar );
-
-    // Replicate an AoS quaternion
-    // 
-    inline Quat( Aos::Quat quat );
-
-    // Insert four AoS quaternions
-    // 
-    inline Quat( Aos::Quat quat0, Aos::Quat quat1, Aos::Quat quat2, Aos::Quat quat3 );
-
-    // Extract four AoS quaternions
-    // 
-    inline void get4Aos( Aos::Quat & result0, Aos::Quat & result1, Aos::Quat & result2, Aos::Quat & result3 ) const;
-
-    // Assign one quaternion to another
-    // 
-    inline Quat & operator =( const Quat & quat );
-
-    // Set the x, y, and z elements of a quaternion
-    // NOTE: 
-    // This function does not change the w element.
-    // 
-    inline Quat & setXYZ( const Vector3 & vec );
-
-    // Get the x, y, and z elements of a quaternion
-    // 
-    inline const Vector3 getXYZ( ) const;
-
-    // Set the x element of a quaternion
-    // 
-    inline Quat & setX( vec_float4 x );
-
-    // Set the y element of a quaternion
-    // 
-    inline Quat & setY( vec_float4 y );
-
-    // Set the z element of a quaternion
-    // 
-    inline Quat & setZ( vec_float4 z );
-
-    // Set the w element of a quaternion
-    // 
-    inline Quat & setW( vec_float4 w );
-
-    // Get the x element of a quaternion
-    // 
-    inline vec_float4 getX( ) const;
-
-    // Get the y element of a quaternion
-    // 
-    inline vec_float4 getY( ) const;
-
-    // Get the z element of a quaternion
-    // 
-    inline vec_float4 getZ( ) const;
-
-    // Get the w element of a quaternion
-    // 
-    inline vec_float4 getW( ) const;
-
-    // Set an x, y, z, or w element of a quaternion by index
-    // 
-    inline Quat & setElem( int idx, vec_float4 value );
-
-    // Get an x, y, z, or w element of a quaternion by index
-    // 
-    inline vec_float4 getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline vec_float4_t & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline vec_float4 operator []( int idx ) const;
-
-    // Add two quaternions
-    // 
-    inline const Quat operator +( const Quat & quat ) const;
-
-    // Subtract a quaternion from another quaternion
-    // 
-    inline const Quat operator -( const Quat & quat ) const;
-
-    // Multiply two quaternions
-    // 
-    inline const Quat operator *( const Quat & quat ) const;
-
-    // Multiply a quaternion by a scalar
-    // 
-    inline const Quat operator *( vec_float4 scalar ) const;
-
-    // Divide a quaternion by a scalar
-    // 
-    inline const Quat operator /( vec_float4 scalar ) const;
-
-    // Perform compound assignment and addition with a quaternion
-    // 
-    inline Quat & operator +=( const Quat & quat );
-
-    // Perform compound assignment and subtraction by a quaternion
-    // 
-    inline Quat & operator -=( const Quat & quat );
-
-    // Perform compound assignment and multiplication by a quaternion
-    // 
-    inline Quat & operator *=( const Quat & quat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Quat & operator *=( vec_float4 scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Quat & operator /=( vec_float4 scalar );
-
-    // Negate all elements of a quaternion
-    // 
-    inline const Quat operator -( ) const;
-
-    // Construct an identity quaternion
-    // 
-    static inline const Quat identity( );
-
-    // Construct a quaternion to rotate between two unit-length 3-D vectors
-    // NOTE: 
-    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
-    // 
-    static inline const Quat rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 );
-
-    // Construct a quaternion to rotate around a unit-length 3-D vector
-    // 
-    static inline const Quat rotation( vec_float4 radians, const Vector3 & unitVec );
-
-    // Construct a quaternion to rotate around the x axis
-    // 
-    static inline const Quat rotationX( vec_float4 radians );
-
-    // Construct a quaternion to rotate around the y axis
-    // 
-    static inline const Quat rotationY( vec_float4 radians );
-
-    // Construct a quaternion to rotate around the z axis
-    // 
-    static inline const Quat rotationZ( vec_float4 radians );
-
-};
-
-// Multiply a quaternion by a scalar
-// 
-inline const Quat operator *( vec_float4 scalar, const Quat & quat );
-
-// Compute the conjugate of a quaternion
-// 
-inline const Quat conj( const Quat & quat );
-
-// Use a unit-length quaternion to rotate a 3-D vector
-// 
-inline const Vector3 rotate( const Quat & unitQuat, const Vector3 & vec );
-
-// Compute the dot product of two quaternions
-// 
-inline vec_float4 dot( const Quat & quat0, const Quat & quat1 );
-
-// Compute the norm of a quaternion
-// 
-inline vec_float4 norm( const Quat & quat );
-
-// Compute the length of a quaternion
-// 
-inline vec_float4 length( const Quat & quat );
-
-// Normalize a quaternion
-// NOTE: 
-// The result is unpredictable when all elements of quat are at or near zero.
-// 
-inline const Quat normalize( const Quat & quat );
-
-// Linear interpolation between two quaternions
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat lerp( vec_float4 t, const Quat & quat0, const Quat & quat1 );
-
-// Spherical linear interpolation between two quaternions
-// NOTE: 
-// Interpolates along the shortest path between orientations.
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat slerp( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1 );
-
-// Spherical quadrangle interpolation
-// 
-inline const Quat squad( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 );
-
-// Conditionally select between two quaternions
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Quat select( const Quat & quat0, const Quat & quat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a quaternion
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Quat & quat );
-
-// Print a quaternion and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Quat & quat, const char * name );
-
-#endif
-
-// A set of four 3x3 matrices in structure-of-arrays format
-//
-class Matrix3
-{
-    Vector3 mCol0;
-    Vector3 mCol1;
-    Vector3 mCol2;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Matrix3( ) { };
-
-    // Copy a 3x3 matrix
-    // 
-    inline Matrix3( const Matrix3 & mat );
-
-    // Construct a 3x3 matrix containing the specified columns
-    // 
-    inline Matrix3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2 );
-
-    // Construct a 3x3 rotation matrix from a unit-length quaternion
-    // 
-    explicit inline Matrix3( const Quat & unitQuat );
-
-    // Set all elements of a 3x3 matrix to the same scalar value
-    // 
-    explicit inline Matrix3( vec_float4 scalar );
-
-    // Replicate an AoS 3x3 matrix
-    // 
-    inline Matrix3( const Aos::Matrix3 & mat );
-
-    // Insert four AoS 3x3 matrices
-    // 
-    inline Matrix3( const Aos::Matrix3 & mat0, const Aos::Matrix3 & mat1, const Aos::Matrix3 & mat2, const Aos::Matrix3 & mat3 );
-
-    // Extract four AoS 3x3 matrices
-    // 
-    inline void get4Aos( Aos::Matrix3 & result0, Aos::Matrix3 & result1, Aos::Matrix3 & result2, Aos::Matrix3 & result3 ) const;
-
-    // Assign one 3x3 matrix to another
-    // 
-    inline Matrix3 & operator =( const Matrix3 & mat );
-
-    // Set column 0 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol0( const Vector3 & col0 );
-
-    // Set column 1 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol1( const Vector3 & col1 );
-
-    // Set column 2 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol2( const Vector3 & col2 );
-
-    // Get column 0 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol0( ) const;
-
-    // Get column 1 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol1( ) const;
-
-    // Get column 2 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol2( ) const;
-
-    // Set the column of a 3x3 matrix referred to by the specified index
-    // 
-    inline Matrix3 & setCol( int col, const Vector3 & vec );
-
-    // Set the row of a 3x3 matrix referred to by the specified index
-    // 
-    inline Matrix3 & setRow( int row, const Vector3 & vec );
-
-    // Get the column of a 3x3 matrix referred to by the specified index
-    // 
-    inline const Vector3 getCol( int col ) const;
-
-    // Get the row of a 3x3 matrix referred to by the specified index
-    // 
-    inline const Vector3 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector3 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector3 operator []( int col ) const;
-
-    // Set the element of a 3x3 matrix referred to by column and row indices
-    // 
-    inline Matrix3 & setElem( int col, int row, vec_float4 val );
-
-    // Get the element of a 3x3 matrix referred to by column and row indices
-    // 
-    inline vec_float4 getElem( int col, int row ) const;
-
-    // Add two 3x3 matrices
-    // 
-    inline const Matrix3 operator +( const Matrix3 & mat ) const;
-
-    // Subtract a 3x3 matrix from another 3x3 matrix
-    // 
-    inline const Matrix3 operator -( const Matrix3 & mat ) const;
-
-    // Negate all elements of a 3x3 matrix
-    // 
-    inline const Matrix3 operator -( ) const;
-
-    // Multiply a 3x3 matrix by a scalar
-    // 
-    inline const Matrix3 operator *( vec_float4 scalar ) const;
-
-    // Multiply a 3x3 matrix by a 3-D vector
-    // 
-    inline const Vector3 operator *( const Vector3 & vec ) const;
-
-    // Multiply two 3x3 matrices
-    // 
-    inline const Matrix3 operator *( const Matrix3 & mat ) const;
-
-    // Perform compound assignment and addition with a 3x3 matrix
-    // 
-    inline Matrix3 & operator +=( const Matrix3 & mat );
-
-    // Perform compound assignment and subtraction by a 3x3 matrix
-    // 
-    inline Matrix3 & operator -=( const Matrix3 & mat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Matrix3 & operator *=( vec_float4 scalar );
-
-    // Perform compound assignment and multiplication by a 3x3 matrix
-    // 
-    inline Matrix3 & operator *=( const Matrix3 & mat );
-
-    // Construct an identity 3x3 matrix
-    // 
-    static inline const Matrix3 identity( );
-
-    // Construct a 3x3 matrix to rotate around the x axis
-    // 
-    static inline const Matrix3 rotationX( vec_float4 radians );
-
-    // Construct a 3x3 matrix to rotate around the y axis
-    // 
-    static inline const Matrix3 rotationY( vec_float4 radians );
-
-    // Construct a 3x3 matrix to rotate around the z axis
-    // 
-    static inline const Matrix3 rotationZ( vec_float4 radians );
-
-    // Construct a 3x3 matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Matrix3 rotationZYX( const Vector3 & radiansXYZ );
-
-    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Matrix3 rotation( vec_float4 radians, const Vector3 & unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Matrix3 rotation( const Quat & unitQuat );
-
-    // Construct a 3x3 matrix to perform scaling
-    // 
-    static inline const Matrix3 scale( const Vector3 & scaleVec );
-
-};
-// Multiply a 3x3 matrix by a scalar
-// 
-inline const Matrix3 operator *( vec_float4 scalar, const Matrix3 & mat );
-
-// Append (post-multiply) a scale transformation to a 3x3 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat );
-
-// Multiply two 3x3 matrices per element
-// 
-inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
-
-// Compute the absolute value of a 3x3 matrix per element
-// 
-inline const Matrix3 absPerElem( const Matrix3 & mat );
-
-// Transpose of a 3x3 matrix
-// 
-inline const Matrix3 transpose( const Matrix3 & mat );
-
-// Compute the inverse of a 3x3 matrix
-// NOTE: 
-// Result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix3 inverse( const Matrix3 & mat );
-
-// Determinant of a 3x3 matrix
-// 
-inline vec_float4 determinant( const Matrix3 & mat );
-
-// Conditionally select between two 3x3 matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3x3 matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix3 & mat );
-
-// Print a 3x3 matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix3 & mat, const char * name );
-
-#endif
-
-// A set of four 4x4 matrices in structure-of-arrays format
-//
-class Matrix4
-{
-    Vector4 mCol0;
-    Vector4 mCol1;
-    Vector4 mCol2;
-    Vector4 mCol3;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Matrix4( ) { };
-
-    // Copy a 4x4 matrix
-    // 
-    inline Matrix4( const Matrix4 & mat );
-
-    // Construct a 4x4 matrix containing the specified columns
-    // 
-    inline Matrix4( const Vector4 & col0, const Vector4 & col1, const Vector4 & col2, const Vector4 & col3 );
-
-    // Construct a 4x4 matrix from a 3x4 transformation matrix
-    // 
-    explicit inline Matrix4( const Transform3 & mat );
-
-    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
-    // 
-    inline Matrix4( const Matrix3 & mat, const Vector3 & translateVec );
-
-    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
-    // 
-    inline Matrix4( const Quat & unitQuat, const Vector3 & translateVec );
-
-    // Set all elements of a 4x4 matrix to the same scalar value
-    // 
-    explicit inline Matrix4( vec_float4 scalar );
-
-    // Replicate an AoS 4x4 matrix
-    // 
-    inline Matrix4( const Aos::Matrix4 & mat );
-
-    // Insert four AoS 4x4 matrices
-    // 
-    inline Matrix4( const Aos::Matrix4 & mat0, const Aos::Matrix4 & mat1, const Aos::Matrix4 & mat2, const Aos::Matrix4 & mat3 );
-
-    // Extract four AoS 4x4 matrices
-    // 
-    inline void get4Aos( Aos::Matrix4 & result0, Aos::Matrix4 & result1, Aos::Matrix4 & result2, Aos::Matrix4 & result3 ) const;
-
-    // Assign one 4x4 matrix to another
-    // 
-    inline Matrix4 & operator =( const Matrix4 & mat );
-
-    // Set the upper-left 3x3 submatrix
-    // NOTE: 
-    // This function does not change the bottom row elements.
-    // 
-    inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
-
-    // Get the upper-left 3x3 submatrix of a 4x4 matrix
-    // 
-    inline const Matrix3 getUpper3x3( ) const;
-
-    // Set translation component
-    // NOTE: 
-    // This function does not change the bottom row elements.
-    // 
-    inline Matrix4 & setTranslation( const Vector3 & translateVec );
-
-    // Get the translation component of a 4x4 matrix
-    // 
-    inline const Vector3 getTranslation( ) const;
-
-    // Set column 0 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol0( const Vector4 & col0 );
-
-    // Set column 1 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol1( const Vector4 & col1 );
-
-    // Set column 2 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol2( const Vector4 & col2 );
-
-    // Set column 3 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol3( const Vector4 & col3 );
-
-    // Get column 0 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol0( ) const;
-
-    // Get column 1 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol1( ) const;
-
-    // Get column 2 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol2( ) const;
-
-    // Get column 3 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol3( ) const;
-
-    // Set the column of a 4x4 matrix referred to by the specified index
-    // 
-    inline Matrix4 & setCol( int col, const Vector4 & vec );
-
-    // Set the row of a 4x4 matrix referred to by the specified index
-    // 
-    inline Matrix4 & setRow( int row, const Vector4 & vec );
-
-    // Get the column of a 4x4 matrix referred to by the specified index
-    // 
-    inline const Vector4 getCol( int col ) const;
-
-    // Get the row of a 4x4 matrix referred to by the specified index
-    // 
-    inline const Vector4 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector4 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector4 operator []( int col ) const;
-
-    // Set the element of a 4x4 matrix referred to by column and row indices
-    // 
-    inline Matrix4 & setElem( int col, int row, vec_float4 val );
-
-    // Get the element of a 4x4 matrix referred to by column and row indices
-    // 
-    inline vec_float4 getElem( int col, int row ) const;
-
-    // Add two 4x4 matrices
-    // 
-    inline const Matrix4 operator +( const Matrix4 & mat ) const;
-
-    // Subtract a 4x4 matrix from another 4x4 matrix
-    // 
-    inline const Matrix4 operator -( const Matrix4 & mat ) const;
-
-    // Negate all elements of a 4x4 matrix
-    // 
-    inline const Matrix4 operator -( ) const;
-
-    // Multiply a 4x4 matrix by a scalar
-    // 
-    inline const Matrix4 operator *( vec_float4 scalar ) const;
-
-    // Multiply a 4x4 matrix by a 4-D vector
-    // 
-    inline const Vector4 operator *( const Vector4 & vec ) const;
-
-    // Multiply a 4x4 matrix by a 3-D vector
-    // 
-    inline const Vector4 operator *( const Vector3 & vec ) const;
-
-    // Multiply a 4x4 matrix by a 3-D point
-    // 
-    inline const Vector4 operator *( const Point3 & pnt ) const;
-
-    // Multiply two 4x4 matrices
-    // 
-    inline const Matrix4 operator *( const Matrix4 & mat ) const;
-
-    // Multiply a 4x4 matrix by a 3x4 transformation matrix
-    // 
-    inline const Matrix4 operator *( const Transform3 & tfrm ) const;
-
-    // Perform compound assignment and addition with a 4x4 matrix
-    // 
-    inline Matrix4 & operator +=( const Matrix4 & mat );
-
-    // Perform compound assignment and subtraction by a 4x4 matrix
-    // 
-    inline Matrix4 & operator -=( const Matrix4 & mat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Matrix4 & operator *=( vec_float4 scalar );
-
-    // Perform compound assignment and multiplication by a 4x4 matrix
-    // 
-    inline Matrix4 & operator *=( const Matrix4 & mat );
-
-    // Perform compound assignment and multiplication by a 3x4 transformation matrix
-    // 
-    inline Matrix4 & operator *=( const Transform3 & tfrm );
-
-    // Construct an identity 4x4 matrix
-    // 
-    static inline const Matrix4 identity( );
-
-    // Construct a 4x4 matrix to rotate around the x axis
-    // 
-    static inline const Matrix4 rotationX( vec_float4 radians );
-
-    // Construct a 4x4 matrix to rotate around the y axis
-    // 
-    static inline const Matrix4 rotationY( vec_float4 radians );
-
-    // Construct a 4x4 matrix to rotate around the z axis
-    // 
-    static inline const Matrix4 rotationZ( vec_float4 radians );
-
-    // Construct a 4x4 matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Matrix4 rotationZYX( const Vector3 & radiansXYZ );
-
-    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Matrix4 rotation( vec_float4 radians, const Vector3 & unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Matrix4 rotation( const Quat & unitQuat );
-
-    // Construct a 4x4 matrix to perform scaling
-    // 
-    static inline const Matrix4 scale( const Vector3 & scaleVec );
-
-    // Construct a 4x4 matrix to perform translation
-    // 
-    static inline const Matrix4 translation( const Vector3 & translateVec );
-
-    // Construct viewing matrix based on eye position, position looked at, and up direction
-    // 
-    static inline const Matrix4 lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec );
-
-    // Construct a perspective projection matrix
-    // 
-    static inline const Matrix4 perspective( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar );
-
-    // Construct a perspective projection matrix based on frustum
-    // 
-    static inline const Matrix4 frustum( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
-
-    // Construct an orthographic projection matrix
-    // 
-    static inline const Matrix4 orthographic( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
-
-};
-// Multiply a 4x4 matrix by a scalar
-// 
-inline const Matrix4 operator *( vec_float4 scalar, const Matrix4 & mat );
-
-// Append (post-multiply) a scale transformation to a 4x4 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat );
-
-// Multiply two 4x4 matrices per element
-// 
-inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
-
-// Compute the absolute value of a 4x4 matrix per element
-// 
-inline const Matrix4 absPerElem( const Matrix4 & mat );
-
-// Transpose of a 4x4 matrix
-// 
-inline const Matrix4 transpose( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix
-// NOTE: 
-// Result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix4 inverse( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix4 affineInverse( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
-// 
-inline const Matrix4 orthoInverse( const Matrix4 & mat );
-
-// Determinant of a 4x4 matrix
-// 
-inline vec_float4 determinant( const Matrix4 & mat );
-
-// Conditionally select between two 4x4 matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 4x4 matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix4 & mat );
-
-// Print a 4x4 matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix4 & mat, const char * name );
-
-#endif
-
-// A set of four 3x4 transformation matrices in structure-of-arrays format
-//
-class Transform3
-{
-    Vector3 mCol0;
-    Vector3 mCol1;
-    Vector3 mCol2;
-    Vector3 mCol3;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Transform3( ) { };
-
-    // Copy a 3x4 transformation matrix
-    // 
-    inline Transform3( const Transform3 & tfrm );
-
-    // Construct a 3x4 transformation matrix containing the specified columns
-    // 
-    inline Transform3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2, const Vector3 & col3 );
-
-    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
-    // 
-    inline Transform3( const Matrix3 & tfrm, const Vector3 & translateVec );
-
-    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
-    // 
-    inline Transform3( const Quat & unitQuat, const Vector3 & translateVec );
-
-    // Set all elements of a 3x4 transformation matrix to the same scalar value
-    // 
-    explicit inline Transform3( vec_float4 scalar );
-
-    // Replicate an AoS 3x4 transformation matrix
-    // 
-    inline Transform3( const Aos::Transform3 & tfrm );
-
-    // Insert four AoS 3x4 transformation matrices
-    // 
-    inline Transform3( const Aos::Transform3 & tfrm0, const Aos::Transform3 & tfrm1, const Aos::Transform3 & tfrm2, const Aos::Transform3 & tfrm3 );
-
-    // Extract four AoS 3x4 transformation matrices
-    // 
-    inline void get4Aos( Aos::Transform3 & result0, Aos::Transform3 & result1, Aos::Transform3 & result2, Aos::Transform3 & result3 ) const;
-
-    // Assign one 3x4 transformation matrix to another
-    // 
-    inline Transform3 & operator =( const Transform3 & tfrm );
-
-    // Set the upper-left 3x3 submatrix
-    // 
-    inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
-
-    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
-    // 
-    inline const Matrix3 getUpper3x3( ) const;
-
-    // Set translation component
-    // 
-    inline Transform3 & setTranslation( const Vector3 & translateVec );
-
-    // Get the translation component of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getTranslation( ) const;
-
-    // Set column 0 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol0( const Vector3 & col0 );
-
-    // Set column 1 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol1( const Vector3 & col1 );
-
-    // Set column 2 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol2( const Vector3 & col2 );
-
-    // Set column 3 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol3( const Vector3 & col3 );
-
-    // Get column 0 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol0( ) const;
-
-    // Get column 1 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol1( ) const;
-
-    // Get column 2 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol2( ) const;
-
-    // Get column 3 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol3( ) const;
-
-    // Set the column of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline Transform3 & setCol( int col, const Vector3 & vec );
-
-    // Set the row of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline Transform3 & setRow( int row, const Vector4 & vec );
-
-    // Get the column of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline const Vector3 getCol( int col ) const;
-
-    // Get the row of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline const Vector4 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector3 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector3 operator []( int col ) const;
-
-    // Set the element of a 3x4 transformation matrix referred to by column and row indices
-    // 
-    inline Transform3 & setElem( int col, int row, vec_float4 val );
-
-    // Get the element of a 3x4 transformation matrix referred to by column and row indices
-    // 
-    inline vec_float4 getElem( int col, int row ) const;
-
-    // Multiply a 3x4 transformation matrix by a 3-D vector
-    // 
-    inline const Vector3 operator *( const Vector3 & vec ) const;
-
-    // Multiply a 3x4 transformation matrix by a 3-D point
-    // 
-    inline const Point3 operator *( const Point3 & pnt ) const;
-
-    // Multiply two 3x4 transformation matrices
-    // 
-    inline const Transform3 operator *( const Transform3 & tfrm ) const;
-
-    // Perform compound assignment and multiplication by a 3x4 transformation matrix
-    // 
-    inline Transform3 & operator *=( const Transform3 & tfrm );
-
-    // Construct an identity 3x4 transformation matrix
-    // 
-    static inline const Transform3 identity( );
-
-    // Construct a 3x4 transformation matrix to rotate around the x axis
-    // 
-    static inline const Transform3 rotationX( vec_float4 radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the y axis
-    // 
-    static inline const Transform3 rotationY( vec_float4 radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the z axis
-    // 
-    static inline const Transform3 rotationZ( vec_float4 radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Transform3 rotationZYX( const Vector3 & radiansXYZ );
-
-    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Transform3 rotation( vec_float4 radians, const Vector3 & unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Transform3 rotation( const Quat & unitQuat );
-
-    // Construct a 3x4 transformation matrix to perform scaling
-    // 
-    static inline const Transform3 scale( const Vector3 & scaleVec );
-
-    // Construct a 3x4 transformation matrix to perform translation
-    // 
-    static inline const Transform3 translation( const Vector3 & translateVec );
-
-};
-// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm );
-
-// Multiply two 3x4 transformation matrices per element
-// 
-inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
-
-// Compute the absolute value of a 3x4 transformation matrix per element
-// 
-inline const Transform3 absPerElem( const Transform3 & tfrm );
-
-// Inverse of a 3x4 transformation matrix
-// NOTE: 
-// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
-// 
-inline const Transform3 inverse( const Transform3 & tfrm );
-
-// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
-// 
-inline const Transform3 orthoInverse( const Transform3 & tfrm );
-
-// Conditionally select between two 3x4 transformation matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3x4 transformation matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Transform3 & tfrm );
-
-// Print a 3x4 transformation matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Transform3 & tfrm, const char * name );
-
-#endif
-
-} // namespace Soa
-} // namespace Vectormath
-
-#include "vec_soa.h"
-#include "quat_soa.h"
-#include "mat_soa.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_SOA_CPP_PPU_H
+#define _VECTORMATH_SOA_CPP_PPU_H
+
+#include <math.h>
+#include <altivec.h>
+#include "vectormath_aos.h"
+
+#ifdef _VECTORMATH_DEBUG
+#include <stdio.h>
+#endif
+
+namespace Vectormath {
+
+namespace Soa {
+
+//-----------------------------------------------------------------------------
+// Forward Declarations
+//
+
+class Vector3;
+class Vector4;
+class Point3;
+class Quat;
+class Matrix3;
+class Matrix4;
+class Transform3;
+
+// A set of four 3-D vectors in structure-of-arrays format
+//
+class Vector3
+{
+    typedef vec_float4 vec_float4_t;
+    vec_float4 mX;
+    vec_float4 mY;
+    vec_float4 mZ;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector3( ) { };
+
+    // Copy a 3-D vector
+    // 
+    inline Vector3( const Vector3 & vec );
+
+    // Construct a 3-D vector from x, y, and z elements
+    // 
+    inline Vector3( vec_float4 x, vec_float4 y, vec_float4 z );
+
+    // Copy elements from a 3-D point into a 3-D vector
+    // 
+    explicit inline Vector3( const Point3 & pnt );
+
+    // Set all elements of a 3-D vector to the same scalar value
+    // 
+    explicit inline Vector3( vec_float4 scalar );
+
+    // Replicate an AoS 3-D vector
+    // 
+    inline Vector3( Aos::Vector3 vec );
+
+    // Insert four AoS 3-D vectors
+    // 
+    inline Vector3( Aos::Vector3 vec0, Aos::Vector3 vec1, Aos::Vector3 vec2, Aos::Vector3 vec3 );
+
+    // Extract four AoS 3-D vectors
+    // 
+    inline void get4Aos( Aos::Vector3 & result0, Aos::Vector3 & result1, Aos::Vector3 & result2, Aos::Vector3 & result3 ) const;
+
+    // Assign one 3-D vector to another
+    // 
+    inline Vector3 & operator =( const Vector3 & vec );
+
+    // Set the x element of a 3-D vector
+    // 
+    inline Vector3 & setX( vec_float4 x );
+
+    // Set the y element of a 3-D vector
+    // 
+    inline Vector3 & setY( vec_float4 y );
+
+    // Set the z element of a 3-D vector
+    // 
+    inline Vector3 & setZ( vec_float4 z );
+
+    // Get the x element of a 3-D vector
+    // 
+    inline vec_float4 getX( ) const;
+
+    // Get the y element of a 3-D vector
+    // 
+    inline vec_float4 getY( ) const;
+
+    // Get the z element of a 3-D vector
+    // 
+    inline vec_float4 getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D vector by index
+    // 
+    inline Vector3 & setElem( int idx, vec_float4 value );
+
+    // Get an x, y, or z element of a 3-D vector by index
+    // 
+    inline vec_float4 getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline vec_float4_t & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline vec_float4 operator []( int idx ) const;
+
+    // Add two 3-D vectors
+    // 
+    inline const Vector3 operator +( const Vector3 & vec ) const;
+
+    // Subtract a 3-D vector from another 3-D vector
+    // 
+    inline const Vector3 operator -( const Vector3 & vec ) const;
+
+    // Add a 3-D vector to a 3-D point
+    // 
+    inline const Point3 operator +( const Point3 & pnt ) const;
+
+    // Multiply a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator *( vec_float4 scalar ) const;
+
+    // Divide a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator /( vec_float4 scalar ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Vector3 & operator +=( const Vector3 & vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Vector3 & operator -=( const Vector3 & vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector3 & operator *=( vec_float4 scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector3 & operator /=( vec_float4 scalar );
+
+    // Negate all elements of a 3-D vector
+    // 
+    inline const Vector3 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector3 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector3 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector3 zAxis( );
+
+};
+
+// Multiply a 3-D vector by a scalar
+// 
+inline const Vector3 operator *( vec_float4 scalar, const Vector3 & vec );
+
+// Multiply two 3-D vectors per element
+// 
+inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Divide two 3-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Compute the reciprocal of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector3 recipPerElem( const Vector3 & vec );
+
+// Compute the square root of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Vector3 sqrtPerElem( const Vector3 & vec );
+
+// Compute the reciprocal square root of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Vector3 rsqrtPerElem( const Vector3 & vec );
+
+// Compute the absolute value of a 3-D vector per element
+// 
+inline const Vector3 absPerElem( const Vector3 & vec );
+
+// Copy sign from one 3-D vector to another, per element
+// 
+inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Maximum of two 3-D vectors per element
+// 
+inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Minimum of two 3-D vectors per element
+// 
+inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Maximum element of a 3-D vector
+// 
+inline vec_float4 maxElem( const Vector3 & vec );
+
+// Minimum element of a 3-D vector
+// 
+inline vec_float4 minElem( const Vector3 & vec );
+
+// Compute the sum of all elements of a 3-D vector
+// 
+inline vec_float4 sum( const Vector3 & vec );
+
+// Compute the dot product of two 3-D vectors
+// 
+inline vec_float4 dot( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Compute the square of the length of a 3-D vector
+// 
+inline vec_float4 lengthSqr( const Vector3 & vec );
+
+// Compute the length of a 3-D vector
+// 
+inline vec_float4 length( const Vector3 & vec );
+
+// Normalize a 3-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector3 normalize( const Vector3 & vec );
+
+// Compute cross product of two 3-D vectors
+// 
+inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Outer product of two 3-D vectors
+// 
+inline const Matrix3 outer( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Pre-multiply a row vector by a 3x3 matrix
+// 
+inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat );
+
+// Cross-product matrix of a 3-D vector
+// 
+inline const Matrix3 crossMatrix( const Vector3 & vec );
+
+// Create cross-product matrix and multiply
+// NOTE: 
+// Faster than separately creating a cross-product matrix and multiplying.
+// 
+inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat );
+
+// Linear interpolation between two 3-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 lerp( vec_float4 t, const Vector3 & vec0, const Vector3 & vec1 );
+
+// Spherical linear interpolation between two 3-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 slerp( vec_float4 t, const Vector3 & unitVec0, const Vector3 & unitVec1 );
+
+// Conditionally select between two 3-D vectors
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, vec_uint4 select1 );
+
+// Load four three-float 3-D vectors, stored in three quadwords
+// 
+inline void loadXYZArray( Vector3 & vec, const vec_float4 * threeQuads );
+
+// Store four slots of an SoA 3-D vector in three quadwords
+// 
+inline void storeXYZArray( const Vector3 & vec, vec_float4 * threeQuads );
+
+// Store eight slots of two SoA 3-D vectors as half-floats
+// 
+inline void storeHalfFloats( const Vector3 & vec0, const Vector3 & vec1, vec_ushort8 * threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector3 & vec );
+
+// Print a 3-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector3 & vec, const char * name );
+
+#endif
+
+// A set of four 4-D vectors in structure-of-arrays format
+//
+class Vector4
+{
+    typedef vec_float4 vec_float4_t;
+    vec_float4 mX;
+    vec_float4 mY;
+    vec_float4 mZ;
+    vec_float4 mW;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector4( ) { };
+
+    // Copy a 4-D vector
+    // 
+    inline Vector4( const Vector4 & vec );
+
+    // Construct a 4-D vector from x, y, z, and w elements
+    // 
+    inline Vector4( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
+
+    // Construct a 4-D vector from a 3-D vector and a scalar
+    // 
+    inline Vector4( const Vector3 & xyz, vec_float4 w );
+
+    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+    // 
+    explicit inline Vector4( const Vector3 & vec );
+
+    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+    // 
+    explicit inline Vector4( const Point3 & pnt );
+
+    // Copy elements from a quaternion into a 4-D vector
+    // 
+    explicit inline Vector4( const Quat & quat );
+
+    // Set all elements of a 4-D vector to the same scalar value
+    // 
+    explicit inline Vector4( vec_float4 scalar );
+
+    // Replicate an AoS 4-D vector
+    // 
+    inline Vector4( Aos::Vector4 vec );
+
+    // Insert four AoS 4-D vectors
+    // 
+    inline Vector4( Aos::Vector4 vec0, Aos::Vector4 vec1, Aos::Vector4 vec2, Aos::Vector4 vec3 );
+
+    // Extract four AoS 4-D vectors
+    // 
+    inline void get4Aos( Aos::Vector4 & result0, Aos::Vector4 & result1, Aos::Vector4 & result2, Aos::Vector4 & result3 ) const;
+
+    // Assign one 4-D vector to another
+    // 
+    inline Vector4 & operator =( const Vector4 & vec );
+
+    // Set the x, y, and z elements of a 4-D vector
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Vector4 & setXYZ( const Vector3 & vec );
+
+    // Get the x, y, and z elements of a 4-D vector
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a 4-D vector
+    // 
+    inline Vector4 & setX( vec_float4 x );
+
+    // Set the y element of a 4-D vector
+    // 
+    inline Vector4 & setY( vec_float4 y );
+
+    // Set the z element of a 4-D vector
+    // 
+    inline Vector4 & setZ( vec_float4 z );
+
+    // Set the w element of a 4-D vector
+    // 
+    inline Vector4 & setW( vec_float4 w );
+
+    // Get the x element of a 4-D vector
+    // 
+    inline vec_float4 getX( ) const;
+
+    // Get the y element of a 4-D vector
+    // 
+    inline vec_float4 getY( ) const;
+
+    // Get the z element of a 4-D vector
+    // 
+    inline vec_float4 getZ( ) const;
+
+    // Get the w element of a 4-D vector
+    // 
+    inline vec_float4 getW( ) const;
+
+    // Set an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline Vector4 & setElem( int idx, vec_float4 value );
+
+    // Get an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline vec_float4 getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline vec_float4_t & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline vec_float4 operator []( int idx ) const;
+
+    // Add two 4-D vectors
+    // 
+    inline const Vector4 operator +( const Vector4 & vec ) const;
+
+    // Subtract a 4-D vector from another 4-D vector
+    // 
+    inline const Vector4 operator -( const Vector4 & vec ) const;
+
+    // Multiply a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator *( vec_float4 scalar ) const;
+
+    // Divide a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator /( vec_float4 scalar ) const;
+
+    // Perform compound assignment and addition with a 4-D vector
+    // 
+    inline Vector4 & operator +=( const Vector4 & vec );
+
+    // Perform compound assignment and subtraction by a 4-D vector
+    // 
+    inline Vector4 & operator -=( const Vector4 & vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector4 & operator *=( vec_float4 scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector4 & operator /=( vec_float4 scalar );
+
+    // Negate all elements of a 4-D vector
+    // 
+    inline const Vector4 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector4 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector4 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector4 zAxis( );
+
+    // Construct w axis
+    // 
+    static inline const Vector4 wAxis( );
+
+};
+
+// Multiply a 4-D vector by a scalar
+// 
+inline const Vector4 operator *( vec_float4 scalar, const Vector4 & vec );
+
+// Multiply two 4-D vectors per element
+// 
+inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Divide two 4-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Compute the reciprocal of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector4 recipPerElem( const Vector4 & vec );
+
+// Compute the square root of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Vector4 sqrtPerElem( const Vector4 & vec );
+
+// Compute the reciprocal square root of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Vector4 rsqrtPerElem( const Vector4 & vec );
+
+// Compute the absolute value of a 4-D vector per element
+// 
+inline const Vector4 absPerElem( const Vector4 & vec );
+
+// Copy sign from one 4-D vector to another, per element
+// 
+inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Maximum of two 4-D vectors per element
+// 
+inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Minimum of two 4-D vectors per element
+// 
+inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Maximum element of a 4-D vector
+// 
+inline vec_float4 maxElem( const Vector4 & vec );
+
+// Minimum element of a 4-D vector
+// 
+inline vec_float4 minElem( const Vector4 & vec );
+
+// Compute the sum of all elements of a 4-D vector
+// 
+inline vec_float4 sum( const Vector4 & vec );
+
+// Compute the dot product of two 4-D vectors
+// 
+inline vec_float4 dot( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Compute the square of the length of a 4-D vector
+// 
+inline vec_float4 lengthSqr( const Vector4 & vec );
+
+// Compute the length of a 4-D vector
+// 
+inline vec_float4 length( const Vector4 & vec );
+
+// Normalize a 4-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector4 normalize( const Vector4 & vec );
+
+// Outer product of two 4-D vectors
+// 
+inline const Matrix4 outer( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Linear interpolation between two 4-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 lerp( vec_float4 t, const Vector4 & vec0, const Vector4 & vec1 );
+
+// Spherical linear interpolation between two 4-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 slerp( vec_float4 t, const Vector4 & unitVec0, const Vector4 & unitVec1 );
+
+// Conditionally select between two 4-D vectors
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, vec_uint4 select1 );
+
+// Store four slots of an SoA 4-D vector as half-floats
+// 
+inline void storeHalfFloats( const Vector4 & vec, vec_ushort8 * twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector4 & vec );
+
+// Print a 4-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector4 & vec, const char * name );
+
+#endif
+
+// A set of four 3-D points in structure-of-arrays format
+//
+class Point3
+{
+    typedef vec_float4 vec_float4_t;
+    vec_float4 mX;
+    vec_float4 mY;
+    vec_float4 mZ;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Point3( ) { };
+
+    // Copy a 3-D point
+    // 
+    inline Point3( const Point3 & pnt );
+
+    // Construct a 3-D point from x, y, and z elements
+    // 
+    inline Point3( vec_float4 x, vec_float4 y, vec_float4 z );
+
+    // Copy elements from a 3-D vector into a 3-D point
+    // 
+    explicit inline Point3( const Vector3 & vec );
+
+    // Set all elements of a 3-D point to the same scalar value
+    // 
+    explicit inline Point3( vec_float4 scalar );
+
+    // Replicate an AoS 3-D point
+    // 
+    inline Point3( Aos::Point3 pnt );
+
+    // Insert four AoS 3-D points
+    // 
+    inline Point3( Aos::Point3 pnt0, Aos::Point3 pnt1, Aos::Point3 pnt2, Aos::Point3 pnt3 );
+
+    // Extract four AoS 3-D points
+    // 
+    inline void get4Aos( Aos::Point3 & result0, Aos::Point3 & result1, Aos::Point3 & result2, Aos::Point3 & result3 ) const;
+
+    // Assign one 3-D point to another
+    // 
+    inline Point3 & operator =( const Point3 & pnt );
+
+    // Set the x element of a 3-D point
+    // 
+    inline Point3 & setX( vec_float4 x );
+
+    // Set the y element of a 3-D point
+    // 
+    inline Point3 & setY( vec_float4 y );
+
+    // Set the z element of a 3-D point
+    // 
+    inline Point3 & setZ( vec_float4 z );
+
+    // Get the x element of a 3-D point
+    // 
+    inline vec_float4 getX( ) const;
+
+    // Get the y element of a 3-D point
+    // 
+    inline vec_float4 getY( ) const;
+
+    // Get the z element of a 3-D point
+    // 
+    inline vec_float4 getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D point by index
+    // 
+    inline Point3 & setElem( int idx, vec_float4 value );
+
+    // Get an x, y, or z element of a 3-D point by index
+    // 
+    inline vec_float4 getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline vec_float4_t & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline vec_float4 operator []( int idx ) const;
+
+    // Subtract a 3-D point from another 3-D point
+    // 
+    inline const Vector3 operator -( const Point3 & pnt ) const;
+
+    // Add a 3-D point to a 3-D vector
+    // 
+    inline const Point3 operator +( const Vector3 & vec ) const;
+
+    // Subtract a 3-D vector from a 3-D point
+    // 
+    inline const Point3 operator -( const Vector3 & vec ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Point3 & operator +=( const Vector3 & vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Point3 & operator -=( const Vector3 & vec );
+
+};
+
+// Multiply two 3-D points per element
+// 
+inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Divide two 3-D points per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Compute the reciprocal of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Point3 recipPerElem( const Point3 & pnt );
+
+// Compute the square root of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Point3 sqrtPerElem( const Point3 & pnt );
+
+// Compute the reciprocal square root of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Point3 rsqrtPerElem( const Point3 & pnt );
+
+// Compute the absolute value of a 3-D point per element
+// 
+inline const Point3 absPerElem( const Point3 & pnt );
+
+// Copy sign from one 3-D point to another, per element
+// 
+inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Maximum of two 3-D points per element
+// 
+inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Minimum of two 3-D points per element
+// 
+inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Maximum element of a 3-D point
+// 
+inline vec_float4 maxElem( const Point3 & pnt );
+
+// Minimum element of a 3-D point
+// 
+inline vec_float4 minElem( const Point3 & pnt );
+
+// Compute the sum of all elements of a 3-D point
+// 
+inline vec_float4 sum( const Point3 & pnt );
+
+// Apply uniform scale to a 3-D point
+// 
+inline const Point3 scale( const Point3 & pnt, vec_float4 scaleVal );
+
+// Apply non-uniform scale to a 3-D point
+// 
+inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec );
+
+// Scalar projection of a 3-D point on a unit-length 3-D vector
+// 
+inline vec_float4 projection( const Point3 & pnt, const Vector3 & unitVec );
+
+// Compute the square of the distance of a 3-D point from the coordinate-system origin
+// 
+inline vec_float4 distSqrFromOrigin( const Point3 & pnt );
+
+// Compute the distance of a 3-D point from the coordinate-system origin
+// 
+inline vec_float4 distFromOrigin( const Point3 & pnt );
+
+// Compute the square of the distance between two 3-D points
+// 
+inline vec_float4 distSqr( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Compute the distance between two 3-D points
+// 
+inline vec_float4 dist( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Linear interpolation between two 3-D points
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Point3 lerp( vec_float4 t, const Point3 & pnt0, const Point3 & pnt1 );
+
+// Conditionally select between two 3-D points
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, vec_uint4 select1 );
+
+// Load four three-float 3-D points, stored in three quadwords
+// 
+inline void loadXYZArray( Point3 & pnt, const vec_float4 * threeQuads );
+
+// Store four slots of an SoA 3-D point in three quadwords
+// 
+inline void storeXYZArray( const Point3 & pnt, vec_float4 * threeQuads );
+
+// Store eight slots of two SoA 3-D points as half-floats
+// 
+inline void storeHalfFloats( const Point3 & pnt0, const Point3 & pnt1, vec_ushort8 * threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D point
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Point3 & pnt );
+
+// Print a 3-D point and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Point3 & pnt, const char * name );
+
+#endif
+
+// A set of four quaternions in structure-of-arrays format
+//
+class Quat
+{
+    typedef vec_float4 vec_float4_t;
+    vec_float4 mX;
+    vec_float4 mY;
+    vec_float4 mZ;
+    vec_float4 mW;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Quat( ) { };
+
+    // Copy a quaternion
+    // 
+    inline Quat( const Quat & quat );
+
+    // Construct a quaternion from x, y, z, and w elements
+    // 
+    inline Quat( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
+
+    // Construct a quaternion from a 3-D vector and a scalar
+    // 
+    inline Quat( const Vector3 & xyz, vec_float4 w );
+
+    // Copy elements from a 4-D vector into a quaternion
+    // 
+    explicit inline Quat( const Vector4 & vec );
+
+    // Convert a rotation matrix to a unit-length quaternion
+    // 
+    explicit inline Quat( const Matrix3 & rotMat );
+
+    // Set all elements of a quaternion to the same scalar value
+    // 
+    explicit inline Quat( vec_float4 scalar );
+
+    // Replicate an AoS quaternion
+    // 
+    inline Quat( Aos::Quat quat );
+
+    // Insert four AoS quaternions
+    // 
+    inline Quat( Aos::Quat quat0, Aos::Quat quat1, Aos::Quat quat2, Aos::Quat quat3 );
+
+    // Extract four AoS quaternions
+    // 
+    inline void get4Aos( Aos::Quat & result0, Aos::Quat & result1, Aos::Quat & result2, Aos::Quat & result3 ) const;
+
+    // Assign one quaternion to another
+    // 
+    inline Quat & operator =( const Quat & quat );
+
+    // Set the x, y, and z elements of a quaternion
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Quat & setXYZ( const Vector3 & vec );
+
+    // Get the x, y, and z elements of a quaternion
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a quaternion
+    // 
+    inline Quat & setX( vec_float4 x );
+
+    // Set the y element of a quaternion
+    // 
+    inline Quat & setY( vec_float4 y );
+
+    // Set the z element of a quaternion
+    // 
+    inline Quat & setZ( vec_float4 z );
+
+    // Set the w element of a quaternion
+    // 
+    inline Quat & setW( vec_float4 w );
+
+    // Get the x element of a quaternion
+    // 
+    inline vec_float4 getX( ) const;
+
+    // Get the y element of a quaternion
+    // 
+    inline vec_float4 getY( ) const;
+
+    // Get the z element of a quaternion
+    // 
+    inline vec_float4 getZ( ) const;
+
+    // Get the w element of a quaternion
+    // 
+    inline vec_float4 getW( ) const;
+
+    // Set an x, y, z, or w element of a quaternion by index
+    // 
+    inline Quat & setElem( int idx, vec_float4 value );
+
+    // Get an x, y, z, or w element of a quaternion by index
+    // 
+    inline vec_float4 getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline vec_float4_t & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline vec_float4 operator []( int idx ) const;
+
+    // Add two quaternions
+    // 
+    inline const Quat operator +( const Quat & quat ) const;
+
+    // Subtract a quaternion from another quaternion
+    // 
+    inline const Quat operator -( const Quat & quat ) const;
+
+    // Multiply two quaternions
+    // 
+    inline const Quat operator *( const Quat & quat ) const;
+
+    // Multiply a quaternion by a scalar
+    // 
+    inline const Quat operator *( vec_float4 scalar ) const;
+
+    // Divide a quaternion by a scalar
+    // 
+    inline const Quat operator /( vec_float4 scalar ) const;
+
+    // Perform compound assignment and addition with a quaternion
+    // 
+    inline Quat & operator +=( const Quat & quat );
+
+    // Perform compound assignment and subtraction by a quaternion
+    // 
+    inline Quat & operator -=( const Quat & quat );
+
+    // Perform compound assignment and multiplication by a quaternion
+    // 
+    inline Quat & operator *=( const Quat & quat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Quat & operator *=( vec_float4 scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Quat & operator /=( vec_float4 scalar );
+
+    // Negate all elements of a quaternion
+    // 
+    inline const Quat operator -( ) const;
+
+    // Construct an identity quaternion
+    // 
+    static inline const Quat identity( );
+
+    // Construct a quaternion to rotate between two unit-length 3-D vectors
+    // NOTE: 
+    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+    // 
+    static inline const Quat rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 );
+
+    // Construct a quaternion to rotate around a unit-length 3-D vector
+    // 
+    static inline const Quat rotation( vec_float4 radians, const Vector3 & unitVec );
+
+    // Construct a quaternion to rotate around the x axis
+    // 
+    static inline const Quat rotationX( vec_float4 radians );
+
+    // Construct a quaternion to rotate around the y axis
+    // 
+    static inline const Quat rotationY( vec_float4 radians );
+
+    // Construct a quaternion to rotate around the z axis
+    // 
+    static inline const Quat rotationZ( vec_float4 radians );
+
+};
+
+// Multiply a quaternion by a scalar
+// 
+inline const Quat operator *( vec_float4 scalar, const Quat & quat );
+
+// Compute the conjugate of a quaternion
+// 
+inline const Quat conj( const Quat & quat );
+
+// Use a unit-length quaternion to rotate a 3-D vector
+// 
+inline const Vector3 rotate( const Quat & unitQuat, const Vector3 & vec );
+
+// Compute the dot product of two quaternions
+// 
+inline vec_float4 dot( const Quat & quat0, const Quat & quat1 );
+
+// Compute the norm of a quaternion
+// 
+inline vec_float4 norm( const Quat & quat );
+
+// Compute the length of a quaternion
+// 
+inline vec_float4 length( const Quat & quat );
+
+// Normalize a quaternion
+// NOTE: 
+// The result is unpredictable when all elements of quat are at or near zero.
+// 
+inline const Quat normalize( const Quat & quat );
+
+// Linear interpolation between two quaternions
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat lerp( vec_float4 t, const Quat & quat0, const Quat & quat1 );
+
+// Spherical linear interpolation between two quaternions
+// NOTE: 
+// Interpolates along the shortest path between orientations.
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat slerp( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1 );
+
+// Spherical quadrangle interpolation
+// 
+inline const Quat squad( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 );
+
+// Conditionally select between two quaternions
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Quat select( const Quat & quat0, const Quat & quat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a quaternion
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Quat & quat );
+
+// Print a quaternion and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Quat & quat, const char * name );
+
+#endif
+
+// A set of four 3x3 matrices in structure-of-arrays format
+//
+class Matrix3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix3( ) { };
+
+    // Copy a 3x3 matrix
+    // 
+    inline Matrix3( const Matrix3 & mat );
+
+    // Construct a 3x3 matrix containing the specified columns
+    // 
+    inline Matrix3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2 );
+
+    // Construct a 3x3 rotation matrix from a unit-length quaternion
+    // 
+    explicit inline Matrix3( const Quat & unitQuat );
+
+    // Set all elements of a 3x3 matrix to the same scalar value
+    // 
+    explicit inline Matrix3( vec_float4 scalar );
+
+    // Replicate an AoS 3x3 matrix
+    // 
+    inline Matrix3( const Aos::Matrix3 & mat );
+
+    // Insert four AoS 3x3 matrices
+    // 
+    inline Matrix3( const Aos::Matrix3 & mat0, const Aos::Matrix3 & mat1, const Aos::Matrix3 & mat2, const Aos::Matrix3 & mat3 );
+
+    // Extract four AoS 3x3 matrices
+    // 
+    inline void get4Aos( Aos::Matrix3 & result0, Aos::Matrix3 & result1, Aos::Matrix3 & result2, Aos::Matrix3 & result3 ) const;
+
+    // Assign one 3x3 matrix to another
+    // 
+    inline Matrix3 & operator =( const Matrix3 & mat );
+
+    // Set column 0 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol0( const Vector3 & col0 );
+
+    // Set column 1 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol1( const Vector3 & col1 );
+
+    // Set column 2 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol2( const Vector3 & col2 );
+
+    // Get column 0 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Set the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setCol( int col, const Vector3 & vec );
+
+    // Set the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setRow( int row, const Vector3 & vec );
+
+    // Get the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline Matrix3 & setElem( int col, int row, vec_float4 val );
+
+    // Get the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline vec_float4 getElem( int col, int row ) const;
+
+    // Add two 3x3 matrices
+    // 
+    inline const Matrix3 operator +( const Matrix3 & mat ) const;
+
+    // Subtract a 3x3 matrix from another 3x3 matrix
+    // 
+    inline const Matrix3 operator -( const Matrix3 & mat ) const;
+
+    // Negate all elements of a 3x3 matrix
+    // 
+    inline const Matrix3 operator -( ) const;
+
+    // Multiply a 3x3 matrix by a scalar
+    // 
+    inline const Matrix3 operator *( vec_float4 scalar ) const;
+
+    // Multiply a 3x3 matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( const Vector3 & vec ) const;
+
+    // Multiply two 3x3 matrices
+    // 
+    inline const Matrix3 operator *( const Matrix3 & mat ) const;
+
+    // Perform compound assignment and addition with a 3x3 matrix
+    // 
+    inline Matrix3 & operator +=( const Matrix3 & mat );
+
+    // Perform compound assignment and subtraction by a 3x3 matrix
+    // 
+    inline Matrix3 & operator -=( const Matrix3 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix3 & operator *=( vec_float4 scalar );
+
+    // Perform compound assignment and multiplication by a 3x3 matrix
+    // 
+    inline Matrix3 & operator *=( const Matrix3 & mat );
+
+    // Construct an identity 3x3 matrix
+    // 
+    static inline const Matrix3 identity( );
+
+    // Construct a 3x3 matrix to rotate around the x axis
+    // 
+    static inline const Matrix3 rotationX( vec_float4 radians );
+
+    // Construct a 3x3 matrix to rotate around the y axis
+    // 
+    static inline const Matrix3 rotationY( vec_float4 radians );
+
+    // Construct a 3x3 matrix to rotate around the z axis
+    // 
+    static inline const Matrix3 rotationZ( vec_float4 radians );
+
+    // Construct a 3x3 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix3 rotationZYX( const Vector3 & radiansXYZ );
+
+    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix3 rotation( vec_float4 radians, const Vector3 & unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix3 rotation( const Quat & unitQuat );
+
+    // Construct a 3x3 matrix to perform scaling
+    // 
+    static inline const Matrix3 scale( const Vector3 & scaleVec );
+
+};
+// Multiply a 3x3 matrix by a scalar
+// 
+inline const Matrix3 operator *( vec_float4 scalar, const Matrix3 & mat );
+
+// Append (post-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat );
+
+// Multiply two 3x3 matrices per element
+// 
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
+
+// Compute the absolute value of a 3x3 matrix per element
+// 
+inline const Matrix3 absPerElem( const Matrix3 & mat );
+
+// Transpose of a 3x3 matrix
+// 
+inline const Matrix3 transpose( const Matrix3 & mat );
+
+// Compute the inverse of a 3x3 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix3 inverse( const Matrix3 & mat );
+
+// Determinant of a 3x3 matrix
+// 
+inline vec_float4 determinant( const Matrix3 & mat );
+
+// Conditionally select between two 3x3 matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x3 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat );
+
+// Print a 3x3 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat, const char * name );
+
+#endif
+
+// A set of four 4x4 matrices in structure-of-arrays format
+//
+class Matrix4
+{
+    Vector4 mCol0;
+    Vector4 mCol1;
+    Vector4 mCol2;
+    Vector4 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix4( ) { };
+
+    // Copy a 4x4 matrix
+    // 
+    inline Matrix4( const Matrix4 & mat );
+
+    // Construct a 4x4 matrix containing the specified columns
+    // 
+    inline Matrix4( const Vector4 & col0, const Vector4 & col1, const Vector4 & col2, const Vector4 & col3 );
+
+    // Construct a 4x4 matrix from a 3x4 transformation matrix
+    // 
+    explicit inline Matrix4( const Transform3 & mat );
+
+    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Matrix4( const Matrix3 & mat, const Vector3 & translateVec );
+
+    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Matrix4( const Quat & unitQuat, const Vector3 & translateVec );
+
+    // Set all elements of a 4x4 matrix to the same scalar value
+    // 
+    explicit inline Matrix4( vec_float4 scalar );
+
+    // Replicate an AoS 4x4 matrix
+    // 
+    inline Matrix4( const Aos::Matrix4 & mat );
+
+    // Insert four AoS 4x4 matrices
+    // 
+    inline Matrix4( const Aos::Matrix4 & mat0, const Aos::Matrix4 & mat1, const Aos::Matrix4 & mat2, const Aos::Matrix4 & mat3 );
+
+    // Extract four AoS 4x4 matrices
+    // 
+    inline void get4Aos( Aos::Matrix4 & result0, Aos::Matrix4 & result1, Aos::Matrix4 & result2, Aos::Matrix4 & result3 ) const;
+
+    // Assign one 4x4 matrix to another
+    // 
+    inline Matrix4 & operator =( const Matrix4 & mat );
+
+    // Set the upper-left 3x3 submatrix
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 4x4 matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setTranslation( const Vector3 & translateVec );
+
+    // Get the translation component of a 4x4 matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol0( const Vector4 & col0 );
+
+    // Set column 1 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol1( const Vector4 & col1 );
+
+    // Set column 2 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol2( const Vector4 & col2 );
+
+    // Set column 3 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol3( const Vector4 & col3 );
+
+    // Get column 0 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol0( ) const;
+
+    // Get column 1 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol1( ) const;
+
+    // Get column 2 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol2( ) const;
+
+    // Get column 3 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol3( ) const;
+
+    // Set the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setCol( int col, const Vector4 & vec );
+
+    // Set the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setRow( int row, const Vector4 & vec );
+
+    // Get the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getCol( int col ) const;
+
+    // Get the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector4 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector4 operator []( int col ) const;
+
+    // Set the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline Matrix4 & setElem( int col, int row, vec_float4 val );
+
+    // Get the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline vec_float4 getElem( int col, int row ) const;
+
+    // Add two 4x4 matrices
+    // 
+    inline const Matrix4 operator +( const Matrix4 & mat ) const;
+
+    // Subtract a 4x4 matrix from another 4x4 matrix
+    // 
+    inline const Matrix4 operator -( const Matrix4 & mat ) const;
+
+    // Negate all elements of a 4x4 matrix
+    // 
+    inline const Matrix4 operator -( ) const;
+
+    // Multiply a 4x4 matrix by a scalar
+    // 
+    inline const Matrix4 operator *( vec_float4 scalar ) const;
+
+    // Multiply a 4x4 matrix by a 4-D vector
+    // 
+    inline const Vector4 operator *( const Vector4 & vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D vector
+    // 
+    inline const Vector4 operator *( const Vector3 & vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D point
+    // 
+    inline const Vector4 operator *( const Point3 & pnt ) const;
+
+    // Multiply two 4x4 matrices
+    // 
+    inline const Matrix4 operator *( const Matrix4 & mat ) const;
+
+    // Multiply a 4x4 matrix by a 3x4 transformation matrix
+    // 
+    inline const Matrix4 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and addition with a 4x4 matrix
+    // 
+    inline Matrix4 & operator +=( const Matrix4 & mat );
+
+    // Perform compound assignment and subtraction by a 4x4 matrix
+    // 
+    inline Matrix4 & operator -=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix4 & operator *=( vec_float4 scalar );
+
+    // Perform compound assignment and multiplication by a 4x4 matrix
+    // 
+    inline Matrix4 & operator *=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Matrix4 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 4x4 matrix
+    // 
+    static inline const Matrix4 identity( );
+
+    // Construct a 4x4 matrix to rotate around the x axis
+    // 
+    static inline const Matrix4 rotationX( vec_float4 radians );
+
+    // Construct a 4x4 matrix to rotate around the y axis
+    // 
+    static inline const Matrix4 rotationY( vec_float4 radians );
+
+    // Construct a 4x4 matrix to rotate around the z axis
+    // 
+    static inline const Matrix4 rotationZ( vec_float4 radians );
+
+    // Construct a 4x4 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix4 rotationZYX( const Vector3 & radiansXYZ );
+
+    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix4 rotation( vec_float4 radians, const Vector3 & unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix4 rotation( const Quat & unitQuat );
+
+    // Construct a 4x4 matrix to perform scaling
+    // 
+    static inline const Matrix4 scale( const Vector3 & scaleVec );
+
+    // Construct a 4x4 matrix to perform translation
+    // 
+    static inline const Matrix4 translation( const Vector3 & translateVec );
+
+    // Construct viewing matrix based on eye position, position looked at, and up direction
+    // 
+    static inline const Matrix4 lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec );
+
+    // Construct a perspective projection matrix
+    // 
+    static inline const Matrix4 perspective( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar );
+
+    // Construct a perspective projection matrix based on frustum
+    // 
+    static inline const Matrix4 frustum( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
+
+    // Construct an orthographic projection matrix
+    // 
+    static inline const Matrix4 orthographic( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
+
+};
+// Multiply a 4x4 matrix by a scalar
+// 
+inline const Matrix4 operator *( vec_float4 scalar, const Matrix4 & mat );
+
+// Append (post-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat );
+
+// Multiply two 4x4 matrices per element
+// 
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
+
+// Compute the absolute value of a 4x4 matrix per element
+// 
+inline const Matrix4 absPerElem( const Matrix4 & mat );
+
+// Transpose of a 4x4 matrix
+// 
+inline const Matrix4 transpose( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 inverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 affineInverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+// 
+inline const Matrix4 orthoInverse( const Matrix4 & mat );
+
+// Determinant of a 4x4 matrix
+// 
+inline vec_float4 determinant( const Matrix4 & mat );
+
+// Conditionally select between two 4x4 matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4x4 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat );
+
+// Print a 4x4 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat, const char * name );
+
+#endif
+
+// A set of four 3x4 transformation matrices in structure-of-arrays format
+//
+class Transform3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+    Vector3 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Transform3( ) { };
+
+    // Copy a 3x4 transformation matrix
+    // 
+    inline Transform3( const Transform3 & tfrm );
+
+    // Construct a 3x4 transformation matrix containing the specified columns
+    // 
+    inline Transform3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2, const Vector3 & col3 );
+
+    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Transform3( const Matrix3 & tfrm, const Vector3 & translateVec );
+
+    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Transform3( const Quat & unitQuat, const Vector3 & translateVec );
+
+    // Set all elements of a 3x4 transformation matrix to the same scalar value
+    // 
+    explicit inline Transform3( vec_float4 scalar );
+
+    // Replicate an AoS 3x4 transformation matrix
+    // 
+    inline Transform3( const Aos::Transform3 & tfrm );
+
+    // Insert four AoS 3x4 transformation matrices
+    // 
+    inline Transform3( const Aos::Transform3 & tfrm0, const Aos::Transform3 & tfrm1, const Aos::Transform3 & tfrm2, const Aos::Transform3 & tfrm3 );
+
+    // Extract four AoS 3x4 transformation matrices
+    // 
+    inline void get4Aos( Aos::Transform3 & result0, Aos::Transform3 & result1, Aos::Transform3 & result2, Aos::Transform3 & result3 ) const;
+
+    // Assign one 3x4 transformation matrix to another
+    // 
+    inline Transform3 & operator =( const Transform3 & tfrm );
+
+    // Set the upper-left 3x3 submatrix
+    // 
+    inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // 
+    inline Transform3 & setTranslation( const Vector3 & translateVec );
+
+    // Get the translation component of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol0( const Vector3 & col0 );
+
+    // Set column 1 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol1( const Vector3 & col1 );
+
+    // Set column 2 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol2( const Vector3 & col2 );
+
+    // Set column 3 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol3( const Vector3 & col3 );
+
+    // Get column 0 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Get column 3 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol3( ) const;
+
+    // Set the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setCol( int col, const Vector3 & vec );
+
+    // Set the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setRow( int row, const Vector4 & vec );
+
+    // Get the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline Transform3 & setElem( int col, int row, vec_float4 val );
+
+    // Get the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline vec_float4 getElem( int col, int row ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( const Vector3 & vec ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D point
+    // 
+    inline const Point3 operator *( const Point3 & pnt ) const;
+
+    // Multiply two 3x4 transformation matrices
+    // 
+    inline const Transform3 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Transform3 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 3x4 transformation matrix
+    // 
+    static inline const Transform3 identity( );
+
+    // Construct a 3x4 transformation matrix to rotate around the x axis
+    // 
+    static inline const Transform3 rotationX( vec_float4 radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the y axis
+    // 
+    static inline const Transform3 rotationY( vec_float4 radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the z axis
+    // 
+    static inline const Transform3 rotationZ( vec_float4 radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Transform3 rotationZYX( const Vector3 & radiansXYZ );
+
+    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Transform3 rotation( vec_float4 radians, const Vector3 & unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Transform3 rotation( const Quat & unitQuat );
+
+    // Construct a 3x4 transformation matrix to perform scaling
+    // 
+    static inline const Transform3 scale( const Vector3 & scaleVec );
+
+    // Construct a 3x4 transformation matrix to perform translation
+    // 
+    static inline const Transform3 translation( const Vector3 & translateVec );
+
+};
+// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm );
+
+// Multiply two 3x4 transformation matrices per element
+// 
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
+
+// Compute the absolute value of a 3x4 transformation matrix per element
+// 
+inline const Transform3 absPerElem( const Transform3 & tfrm );
+
+// Inverse of a 3x4 transformation matrix
+// NOTE: 
+// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+// 
+inline const Transform3 inverse( const Transform3 & tfrm );
+
+// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+// 
+inline const Transform3 orthoInverse( const Transform3 & tfrm );
+
+// Conditionally select between two 3x4 transformation matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x4 transformation matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm );
+
+// Print a 3x4 transformation matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm, const char * name );
+
+#endif
+
+} // namespace Soa
+} // namespace Vectormath
+
+#include "vec_soa.h"
+#include "quat_soa.h"
+#include "mat_soa.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/scalar/c/mat_aos.h b/Extras/vectormathlibrary/include/vectormath/scalar/c/mat_aos.h
index 01380b52e..4e0d6ee5c 100644
--- a/Extras/vectormathlibrary/include/vectormath/scalar/c/mat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/scalar/c/mat_aos.h
@@ -1,1452 +1,1452 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_AOS_C_H
-#define _VECTORMATH_MAT_AOS_C_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- */
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-static inline void vmathM3Copy( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Copy( &result->col0, &mat->col0 );
-    vmathV3Copy( &result->col1, &mat->col1 );
-    vmathV3Copy( &result->col2, &mat->col2 );
-}
-
-static inline void vmathM3MakeFromScalar( VmathMatrix3 *result, float scalar )
-{
-    vmathV3MakeFromScalar( &result->col0, scalar );
-    vmathV3MakeFromScalar( &result->col1, scalar );
-    vmathV3MakeFromScalar( &result->col2, scalar );
-}
-
-static inline void vmathM3MakeFromQ( VmathMatrix3 *result, const VmathQuat *unitQuat )
-{
-    float qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
-    qx = unitQuat->x;
-    qy = unitQuat->y;
-    qz = unitQuat->z;
-    qw = unitQuat->w;
-    qx2 = ( qx + qx );
-    qy2 = ( qy + qy );
-    qz2 = ( qz + qz );
-    qxqx2 = ( qx * qx2 );
-    qxqy2 = ( qx * qy2 );
-    qxqz2 = ( qx * qz2 );
-    qxqw2 = ( qw * qx2 );
-    qyqy2 = ( qy * qy2 );
-    qyqz2 = ( qy * qz2 );
-    qyqw2 = ( qw * qy2 );
-    qzqz2 = ( qz * qz2 );
-    qzqw2 = ( qw * qz2 );
-    vmathV3MakeFromElems( &result->col0, ( ( 1.0f - qyqy2 ) - qzqz2 ), ( qxqy2 + qzqw2 ), ( qxqz2 - qyqw2 ) );
-    vmathV3MakeFromElems( &result->col1, ( qxqy2 - qzqw2 ), ( ( 1.0f - qxqx2 ) - qzqz2 ), ( qyqz2 + qxqw2 ) );
-    vmathV3MakeFromElems( &result->col2, ( qxqz2 + qyqw2 ), ( qyqz2 - qxqw2 ), ( ( 1.0f - qxqx2 ) - qyqy2 ) );
-}
-
-static inline void vmathM3MakeFromCols( VmathMatrix3 *result, const VmathVector3 *_col0, const VmathVector3 *_col1, const VmathVector3 *_col2 )
-{
-    vmathV3Copy( &result->col0, _col0 );
-    vmathV3Copy( &result->col1, _col1 );
-    vmathV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathM3SetCol0( VmathMatrix3 *result, const VmathVector3 *_col0 )
-{
-    vmathV3Copy( &result->col0, _col0 );
-}
-
-static inline void vmathM3SetCol1( VmathMatrix3 *result, const VmathVector3 *_col1 )
-{
-    vmathV3Copy( &result->col1, _col1 );
-}
-
-static inline void vmathM3SetCol2( VmathMatrix3 *result, const VmathVector3 *_col2 )
-{
-    vmathV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathM3SetCol( VmathMatrix3 *result, int col, const VmathVector3 *vec )
-{
-    vmathV3Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathM3SetRow( VmathMatrix3 *result, int row, const VmathVector3 *vec )
-{
-    vmathV3SetElem( &result->col0, row, vmathV3GetElem( vec, 0 ) );
-    vmathV3SetElem( &result->col1, row, vmathV3GetElem( vec, 1 ) );
-    vmathV3SetElem( &result->col2, row, vmathV3GetElem( vec, 2 ) );
-}
-
-static inline void vmathM3SetElem( VmathMatrix3 *result, int col, int row, float val )
-{
-    VmathVector3 tmpV3_0;
-    vmathM3GetCol( &tmpV3_0, result, col );
-    vmathV3SetElem( &tmpV3_0, row, val );
-    vmathM3SetCol( result, col, &tmpV3_0 );
-}
-
-static inline float vmathM3GetElem( const VmathMatrix3 *mat, int col, int row )
-{
-    VmathVector3 tmpV3_0;
-    vmathM3GetCol( &tmpV3_0, mat, col );
-    return vmathV3GetElem( &tmpV3_0, row );
-}
-
-static inline void vmathM3GetCol0( VmathVector3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Copy( result, &mat->col0 );
-}
-
-static inline void vmathM3GetCol1( VmathVector3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Copy( result, &mat->col1 );
-}
-
-static inline void vmathM3GetCol2( VmathVector3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Copy( result, &mat->col2 );
-}
-
-static inline void vmathM3GetCol( VmathVector3 *result, const VmathMatrix3 *mat, int col )
-{
-    vmathV3Copy( result, (&mat->col0 + col) );
-}
-
-static inline void vmathM3GetRow( VmathVector3 *result, const VmathMatrix3 *mat, int row )
-{
-    vmathV3MakeFromElems( result, vmathV3GetElem( &mat->col0, row ), vmathV3GetElem( &mat->col1, row ), vmathV3GetElem( &mat->col2, row ) );
-}
-
-static inline void vmathM3Transpose( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    VmathMatrix3 tmpResult;
-    vmathV3MakeFromElems( &tmpResult.col0, mat->col0.x, mat->col1.x, mat->col2.x );
-    vmathV3MakeFromElems( &tmpResult.col1, mat->col0.y, mat->col1.y, mat->col2.y );
-    vmathV3MakeFromElems( &tmpResult.col2, mat->col0.z, mat->col1.z, mat->col2.z );
-    vmathM3Copy( result, &tmpResult );
-}
-
-static inline void vmathM3Inverse( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    VmathVector3 tmp0, tmp1, tmp2;
-    float detinv;
-    vmathV3Cross( &tmp0, &mat->col1, &mat->col2 );
-    vmathV3Cross( &tmp1, &mat->col2, &mat->col0 );
-    vmathV3Cross( &tmp2, &mat->col0, &mat->col1 );
-    detinv = ( 1.0f / vmathV3Dot( &mat->col2, &tmp2 ) );
-    vmathV3MakeFromElems( &result->col0, ( tmp0.x * detinv ), ( tmp1.x * detinv ), ( tmp2.x * detinv ) );
-    vmathV3MakeFromElems( &result->col1, ( tmp0.y * detinv ), ( tmp1.y * detinv ), ( tmp2.y * detinv ) );
-    vmathV3MakeFromElems( &result->col2, ( tmp0.z * detinv ), ( tmp1.z * detinv ), ( tmp2.z * detinv ) );
-}
-
-static inline float vmathM3Determinant( const VmathMatrix3 *mat )
-{
-    VmathVector3 tmpV3_0;
-    vmathV3Cross( &tmpV3_0, &mat->col0, &mat->col1 );
-    return vmathV3Dot( &mat->col2, &tmpV3_0 );
-}
-
-static inline void vmathM3Add( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
-{
-    vmathV3Add( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV3Add( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV3Add( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathM3Sub( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
-{
-    vmathV3Sub( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV3Sub( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV3Sub( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathM3Neg( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Neg( &result->col0, &mat->col0 );
-    vmathV3Neg( &result->col1, &mat->col1 );
-    vmathV3Neg( &result->col2, &mat->col2 );
-}
-
-static inline void vmathM3AbsPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3AbsPerElem( &result->col0, &mat->col0 );
-    vmathV3AbsPerElem( &result->col1, &mat->col1 );
-    vmathV3AbsPerElem( &result->col2, &mat->col2 );
-}
-
-static inline void vmathM3ScalarMul( VmathMatrix3 *result, const VmathMatrix3 *mat, float scalar )
-{
-    vmathV3ScalarMul( &result->col0, &mat->col0, scalar );
-    vmathV3ScalarMul( &result->col1, &mat->col1, scalar );
-    vmathV3ScalarMul( &result->col2, &mat->col2, scalar );
-}
-
-static inline void vmathM3MulV3( VmathVector3 *result, const VmathMatrix3 *mat, const VmathVector3 *vec )
-{
-    float tmpX, tmpY, tmpZ;
-    tmpX = ( ( ( mat->col0.x * vec->x ) + ( mat->col1.x * vec->y ) ) + ( mat->col2.x * vec->z ) );
-    tmpY = ( ( ( mat->col0.y * vec->x ) + ( mat->col1.y * vec->y ) ) + ( mat->col2.y * vec->z ) );
-    tmpZ = ( ( ( mat->col0.z * vec->x ) + ( mat->col1.z * vec->y ) ) + ( mat->col2.z * vec->z ) );
-    vmathV3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathM3Mul( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
-{
-    VmathMatrix3 tmpResult;
-    vmathM3MulV3( &tmpResult.col0, mat0, &mat1->col0 );
-    vmathM3MulV3( &tmpResult.col1, mat0, &mat1->col1 );
-    vmathM3MulV3( &tmpResult.col2, mat0, &mat1->col2 );
-    vmathM3Copy( result, &tmpResult );
-}
-
-static inline void vmathM3MulPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
-{
-    vmathV3MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV3MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV3MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathM3MakeIdentity( VmathMatrix3 *result )
-{
-    vmathV3MakeXAxis( &result->col0 );
-    vmathV3MakeYAxis( &result->col1 );
-    vmathV3MakeZAxis( &result->col2 );
-}
-
-static inline void vmathM3MakeRotationX( VmathMatrix3 *result, float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    vmathV3MakeXAxis( &result->col0 );
-    vmathV3MakeFromElems( &result->col1, 0.0f, c, s );
-    vmathV3MakeFromElems( &result->col2, 0.0f, -s, c );
-}
-
-static inline void vmathM3MakeRotationY( VmathMatrix3 *result, float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    vmathV3MakeFromElems( &result->col0, c, 0.0f, -s );
-    vmathV3MakeYAxis( &result->col1 );
-    vmathV3MakeFromElems( &result->col2, s, 0.0f, c );
-}
-
-static inline void vmathM3MakeRotationZ( VmathMatrix3 *result, float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    vmathV3MakeFromElems( &result->col0, c, s, 0.0f );
-    vmathV3MakeFromElems( &result->col1, -s, c, 0.0f );
-    vmathV3MakeZAxis( &result->col2 );
-}
-
-static inline void vmathM3MakeRotationZYX( VmathMatrix3 *result, const VmathVector3 *radiansXYZ )
-{
-    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sX = sinf( radiansXYZ->x );
-    cX = cosf( radiansXYZ->x );
-    sY = sinf( radiansXYZ->y );
-    cY = cosf( radiansXYZ->y );
-    sZ = sinf( radiansXYZ->z );
-    cZ = cosf( radiansXYZ->z );
-    tmp0 = ( cZ * sY );
-    tmp1 = ( sZ * sY );
-    vmathV3MakeFromElems( &result->col0, ( cZ * cY ), ( sZ * cY ), -sY );
-    vmathV3MakeFromElems( &result->col1, ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) );
-    vmathV3MakeFromElems( &result->col2, ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) );
-}
-
-static inline void vmathM3MakeRotationAxis( VmathMatrix3 *result, float radians, const VmathVector3 *unitVec )
-{
-    float x, y, z, s, c, oneMinusC, xy, yz, zx;
-    s = sinf( radians );
-    c = cosf( radians );
-    x = unitVec->x;
-    y = unitVec->y;
-    z = unitVec->z;
-    xy = ( x * y );
-    yz = ( y * z );
-    zx = ( z * x );
-    oneMinusC = ( 1.0f - c );
-    vmathV3MakeFromElems( &result->col0, ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ) );
-    vmathV3MakeFromElems( &result->col1, ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ) );
-    vmathV3MakeFromElems( &result->col2, ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ) );
-}
-
-static inline void vmathM3MakeRotationQ( VmathMatrix3 *result, const VmathQuat *unitQuat )
-{
-    vmathM3MakeFromQ( result, unitQuat );
-}
-
-static inline void vmathM3MakeScale( VmathMatrix3 *result, const VmathVector3 *scaleVec )
-{
-    vmathV3MakeFromElems( &result->col0, scaleVec->x, 0.0f, 0.0f );
-    vmathV3MakeFromElems( &result->col1, 0.0f, scaleVec->y, 0.0f );
-    vmathV3MakeFromElems( &result->col2, 0.0f, 0.0f, scaleVec->z );
-}
-
-static inline void vmathM3AppendScale( VmathMatrix3 *result, const VmathMatrix3 *mat, const VmathVector3 *scaleVec )
-{
-    vmathV3ScalarMul( &result->col0, &mat->col0, vmathV3GetX( scaleVec ) );
-    vmathV3ScalarMul( &result->col1, &mat->col1, vmathV3GetY( scaleVec ) );
-    vmathV3ScalarMul( &result->col2, &mat->col2, vmathV3GetZ( scaleVec ) );
-}
-
-static inline void vmathM3PrependScale( VmathMatrix3 *result, const VmathVector3 *scaleVec, const VmathMatrix3 *mat )
-{
-    vmathV3MulPerElem( &result->col0, &mat->col0, scaleVec );
-    vmathV3MulPerElem( &result->col1, &mat->col1, scaleVec );
-    vmathV3MulPerElem( &result->col2, &mat->col2, scaleVec );
-}
-
-static inline void vmathM3Select( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, unsigned int select1 )
-{
-    vmathV3Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
-    vmathV3Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
-    vmathV3Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathM3Print( const VmathMatrix3 *mat )
-{
-    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2;
-    vmathM3GetRow( &tmpV3_0, mat, 0 );
-    vmathV3Print( &tmpV3_0 );
-    vmathM3GetRow( &tmpV3_1, mat, 1 );
-    vmathV3Print( &tmpV3_1 );
-    vmathM3GetRow( &tmpV3_2, mat, 2 );
-    vmathV3Print( &tmpV3_2 );
-}
-
-static inline void vmathM3Prints( const VmathMatrix3 *mat, const char *name )
-{
-    printf("%s:\n", name);
-    vmathM3Print( mat );
-}
-
-#endif
-
-static inline void vmathM4Copy( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( &result->col0, &mat->col0 );
-    vmathV4Copy( &result->col1, &mat->col1 );
-    vmathV4Copy( &result->col2, &mat->col2 );
-    vmathV4Copy( &result->col3, &mat->col3 );
-}
-
-static inline void vmathM4MakeFromScalar( VmathMatrix4 *result, float scalar )
-{
-    vmathV4MakeFromScalar( &result->col0, scalar );
-    vmathV4MakeFromScalar( &result->col1, scalar );
-    vmathV4MakeFromScalar( &result->col2, scalar );
-    vmathV4MakeFromScalar( &result->col3, scalar );
-}
-
-static inline void vmathM4MakeFromT3( VmathMatrix4 *result, const VmathTransform3 *mat )
-{
-    vmathV4MakeFromV3Scalar( &result->col0, &mat->col0, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col1, &mat->col1, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col2, &mat->col2, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col3, &mat->col3, 1.0f );
-}
-
-static inline void vmathM4MakeFromCols( VmathMatrix4 *result, const VmathVector4 *_col0, const VmathVector4 *_col1, const VmathVector4 *_col2, const VmathVector4 *_col3 )
-{
-    vmathV4Copy( &result->col0, _col0 );
-    vmathV4Copy( &result->col1, _col1 );
-    vmathV4Copy( &result->col2, _col2 );
-    vmathV4Copy( &result->col3, _col3 );
-}
-
-static inline void vmathM4MakeFromM3V3( VmathMatrix4 *result, const VmathMatrix3 *mat, const VmathVector3 *translateVec )
-{
-    vmathV4MakeFromV3Scalar( &result->col0, &mat->col0, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col1, &mat->col1, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col2, &mat->col2, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
-}
-
-static inline void vmathM4MakeFromQV3( VmathMatrix4 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec )
-{
-    VmathMatrix3 mat;
-    vmathM3MakeFromQ( &mat, unitQuat );
-    vmathV4MakeFromV3Scalar( &result->col0, &mat.col0, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col1, &mat.col1, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col2, &mat.col2, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
-}
-
-static inline void vmathM4SetCol0( VmathMatrix4 *result, const VmathVector4 *_col0 )
-{
-    vmathV4Copy( &result->col0, _col0 );
-}
-
-static inline void vmathM4SetCol1( VmathMatrix4 *result, const VmathVector4 *_col1 )
-{
-    vmathV4Copy( &result->col1, _col1 );
-}
-
-static inline void vmathM4SetCol2( VmathMatrix4 *result, const VmathVector4 *_col2 )
-{
-    vmathV4Copy( &result->col2, _col2 );
-}
-
-static inline void vmathM4SetCol3( VmathMatrix4 *result, const VmathVector4 *_col3 )
-{
-    vmathV4Copy( &result->col3, _col3 );
-}
-
-static inline void vmathM4SetCol( VmathMatrix4 *result, int col, const VmathVector4 *vec )
-{
-    vmathV4Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathM4SetRow( VmathMatrix4 *result, int row, const VmathVector4 *vec )
-{
-    vmathV4SetElem( &result->col0, row, vmathV4GetElem( vec, 0 ) );
-    vmathV4SetElem( &result->col1, row, vmathV4GetElem( vec, 1 ) );
-    vmathV4SetElem( &result->col2, row, vmathV4GetElem( vec, 2 ) );
-    vmathV4SetElem( &result->col3, row, vmathV4GetElem( vec, 3 ) );
-}
-
-static inline void vmathM4SetElem( VmathMatrix4 *result, int col, int row, float val )
-{
-    VmathVector4 tmpV3_0;
-    vmathM4GetCol( &tmpV3_0, result, col );
-    vmathV4SetElem( &tmpV3_0, row, val );
-    vmathM4SetCol( result, col, &tmpV3_0 );
-}
-
-static inline float vmathM4GetElem( const VmathMatrix4 *mat, int col, int row )
-{
-    VmathVector4 tmpV4_0;
-    vmathM4GetCol( &tmpV4_0, mat, col );
-    return vmathV4GetElem( &tmpV4_0, row );
-}
-
-static inline void vmathM4GetCol0( VmathVector4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( result, &mat->col0 );
-}
-
-static inline void vmathM4GetCol1( VmathVector4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( result, &mat->col1 );
-}
-
-static inline void vmathM4GetCol2( VmathVector4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( result, &mat->col2 );
-}
-
-static inline void vmathM4GetCol3( VmathVector4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( result, &mat->col3 );
-}
-
-static inline void vmathM4GetCol( VmathVector4 *result, const VmathMatrix4 *mat, int col )
-{
-    vmathV4Copy( result, (&mat->col0 + col) );
-}
-
-static inline void vmathM4GetRow( VmathVector4 *result, const VmathMatrix4 *mat, int row )
-{
-    vmathV4MakeFromElems( result, vmathV4GetElem( &mat->col0, row ), vmathV4GetElem( &mat->col1, row ), vmathV4GetElem( &mat->col2, row ), vmathV4GetElem( &mat->col3, row ) );
-}
-
-static inline void vmathM4Transpose( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    VmathMatrix4 tmpResult;
-    vmathV4MakeFromElems( &tmpResult.col0, mat->col0.x, mat->col1.x, mat->col2.x, mat->col3.x );
-    vmathV4MakeFromElems( &tmpResult.col1, mat->col0.y, mat->col1.y, mat->col2.y, mat->col3.y );
-    vmathV4MakeFromElems( &tmpResult.col2, mat->col0.z, mat->col1.z, mat->col2.z, mat->col3.z );
-    vmathV4MakeFromElems( &tmpResult.col3, mat->col0.w, mat->col1.w, mat->col2.w, mat->col3.w );
-    vmathM4Copy( result, &tmpResult );
-}
-
-static inline void vmathM4Inverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    VmathVector4 res0, res1, res2, res3;
-    float mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
-    mA = mat->col0.x;
-    mB = mat->col0.y;
-    mC = mat->col0.z;
-    mD = mat->col0.w;
-    mE = mat->col1.x;
-    mF = mat->col1.y;
-    mG = mat->col1.z;
-    mH = mat->col1.w;
-    mI = mat->col2.x;
-    mJ = mat->col2.y;
-    mK = mat->col2.z;
-    mL = mat->col2.w;
-    mM = mat->col3.x;
-    mN = mat->col3.y;
-    mO = mat->col3.z;
-    mP = mat->col3.w;
-    tmp0 = ( ( mK * mD ) - ( mC * mL ) );
-    tmp1 = ( ( mO * mH ) - ( mG * mP ) );
-    tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
-    tmp3 = ( ( mF * mO ) - ( mN * mG ) );
-    tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
-    tmp5 = ( ( mN * mH ) - ( mF * mP ) );
-    vmathV4SetX( &res0, ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) ) );
-    vmathV4SetY( &res0, ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) ) );
-    vmathV4SetZ( &res0, ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) ) );
-    vmathV4SetW( &res0, ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) ) );
-    detInv = ( 1.0f / ( ( ( ( mA * res0.x ) + ( mE * res0.y ) ) + ( mI * res0.z ) ) + ( mM * res0.w ) ) );
-    vmathV4SetX( &res1, ( mI * tmp1 ) );
-    vmathV4SetY( &res1, ( mM * tmp0 ) );
-    vmathV4SetZ( &res1, ( mA * tmp1 ) );
-    vmathV4SetW( &res1, ( mE * tmp0 ) );
-    vmathV4SetX( &res3, ( mI * tmp3 ) );
-    vmathV4SetY( &res3, ( mM * tmp2 ) );
-    vmathV4SetZ( &res3, ( mA * tmp3 ) );
-    vmathV4SetW( &res3, ( mE * tmp2 ) );
-    vmathV4SetX( &res2, ( mI * tmp5 ) );
-    vmathV4SetY( &res2, ( mM * tmp4 ) );
-    vmathV4SetZ( &res2, ( mA * tmp5 ) );
-    vmathV4SetW( &res2, ( mE * tmp4 ) );
-    tmp0 = ( ( mI * mB ) - ( mA * mJ ) );
-    tmp1 = ( ( mM * mF ) - ( mE * mN ) );
-    tmp2 = ( ( mI * mD ) - ( mA * mL ) );
-    tmp3 = ( ( mM * mH ) - ( mE * mP ) );
-    tmp4 = ( ( mI * mC ) - ( mA * mK ) );
-    tmp5 = ( ( mM * mG ) - ( mE * mO ) );
-    vmathV4SetX( &res2, ( ( ( mL * tmp1 ) - ( mJ * tmp3 ) ) + res2.x ) );
-    vmathV4SetY( &res2, ( ( ( mP * tmp0 ) - ( mN * tmp2 ) ) + res2.y ) );
-    vmathV4SetZ( &res2, ( ( ( mB * tmp3 ) - ( mD * tmp1 ) ) - res2.z ) );
-    vmathV4SetW( &res2, ( ( ( mF * tmp2 ) - ( mH * tmp0 ) ) - res2.w ) );
-    vmathV4SetX( &res3, ( ( ( mJ * tmp5 ) - ( mK * tmp1 ) ) + res3.x ) );
-    vmathV4SetY( &res3, ( ( ( mN * tmp4 ) - ( mO * tmp0 ) ) + res3.y ) );
-    vmathV4SetZ( &res3, ( ( ( mC * tmp1 ) - ( mB * tmp5 ) ) - res3.z ) );
-    vmathV4SetW( &res3, ( ( ( mG * tmp0 ) - ( mF * tmp4 ) ) - res3.w ) );
-    vmathV4SetX( &res1, ( ( ( mK * tmp3 ) - ( mL * tmp5 ) ) - res1.x ) );
-    vmathV4SetY( &res1, ( ( ( mO * tmp2 ) - ( mP * tmp4 ) ) - res1.y ) );
-    vmathV4SetZ( &res1, ( ( ( mD * tmp5 ) - ( mC * tmp3 ) ) + res1.z ) );
-    vmathV4SetW( &res1, ( ( ( mH * tmp4 ) - ( mG * tmp2 ) ) + res1.w ) );
-    vmathV4ScalarMul( &result->col0, &res0, detInv );
-    vmathV4ScalarMul( &result->col1, &res1, detInv );
-    vmathV4ScalarMul( &result->col2, &res2, detInv );
-    vmathV4ScalarMul( &result->col3, &res3, detInv );
-}
-
-static inline void vmathM4AffineInverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    VmathTransform3 affineMat, tmpT3_0;
-    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    vmathV4GetXYZ( &tmpV3_0, &mat->col0 );
-    vmathT3SetCol0( &affineMat, &tmpV3_0 );
-    vmathV4GetXYZ( &tmpV3_1, &mat->col1 );
-    vmathT3SetCol1( &affineMat, &tmpV3_1 );
-    vmathV4GetXYZ( &tmpV3_2, &mat->col2 );
-    vmathT3SetCol2( &affineMat, &tmpV3_2 );
-    vmathV4GetXYZ( &tmpV3_3, &mat->col3 );
-    vmathT3SetCol3( &affineMat, &tmpV3_3 );
-    vmathT3Inverse( &tmpT3_0, &affineMat );
-    vmathM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline void vmathM4OrthoInverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    VmathTransform3 affineMat, tmpT3_0;
-    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    vmathV4GetXYZ( &tmpV3_0, &mat->col0 );
-    vmathT3SetCol0( &affineMat, &tmpV3_0 );
-    vmathV4GetXYZ( &tmpV3_1, &mat->col1 );
-    vmathT3SetCol1( &affineMat, &tmpV3_1 );
-    vmathV4GetXYZ( &tmpV3_2, &mat->col2 );
-    vmathT3SetCol2( &affineMat, &tmpV3_2 );
-    vmathV4GetXYZ( &tmpV3_3, &mat->col3 );
-    vmathT3SetCol3( &affineMat, &tmpV3_3 );
-    vmathT3OrthoInverse( &tmpT3_0, &affineMat );
-    vmathM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline float vmathM4Determinant( const VmathMatrix4 *mat )
-{
-    float dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-    mA = mat->col0.x;
-    mB = mat->col0.y;
-    mC = mat->col0.z;
-    mD = mat->col0.w;
-    mE = mat->col1.x;
-    mF = mat->col1.y;
-    mG = mat->col1.z;
-    mH = mat->col1.w;
-    mI = mat->col2.x;
-    mJ = mat->col2.y;
-    mK = mat->col2.z;
-    mL = mat->col2.w;
-    mM = mat->col3.x;
-    mN = mat->col3.y;
-    mO = mat->col3.z;
-    mP = mat->col3.w;
-    tmp0 = ( ( mK * mD ) - ( mC * mL ) );
-    tmp1 = ( ( mO * mH ) - ( mG * mP ) );
-    tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
-    tmp3 = ( ( mF * mO ) - ( mN * mG ) );
-    tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
-    tmp5 = ( ( mN * mH ) - ( mF * mP ) );
-    dx = ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) );
-    dy = ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) );
-    dz = ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) );
-    dw = ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) );
-    return ( ( ( ( mA * dx ) + ( mE * dy ) ) + ( mI * dz ) ) + ( mM * dw ) );
-}
-
-static inline void vmathM4Add( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
-{
-    vmathV4Add( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV4Add( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV4Add( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathV4Add( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathM4Sub( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
-{
-    vmathV4Sub( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV4Sub( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV4Sub( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathV4Sub( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathM4Neg( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Neg( &result->col0, &mat->col0 );
-    vmathV4Neg( &result->col1, &mat->col1 );
-    vmathV4Neg( &result->col2, &mat->col2 );
-    vmathV4Neg( &result->col3, &mat->col3 );
-}
-
-static inline void vmathM4AbsPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4AbsPerElem( &result->col0, &mat->col0 );
-    vmathV4AbsPerElem( &result->col1, &mat->col1 );
-    vmathV4AbsPerElem( &result->col2, &mat->col2 );
-    vmathV4AbsPerElem( &result->col3, &mat->col3 );
-}
-
-static inline void vmathM4ScalarMul( VmathMatrix4 *result, const VmathMatrix4 *mat, float scalar )
-{
-    vmathV4ScalarMul( &result->col0, &mat->col0, scalar );
-    vmathV4ScalarMul( &result->col1, &mat->col1, scalar );
-    vmathV4ScalarMul( &result->col2, &mat->col2, scalar );
-    vmathV4ScalarMul( &result->col3, &mat->col3, scalar );
-}
-
-static inline void vmathM4MulV4( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector4 *vec )
-{
-    float tmpX, tmpY, tmpZ, tmpW;
-    tmpX = ( ( ( ( mat->col0.x * vec->x ) + ( mat->col1.x * vec->y ) ) + ( mat->col2.x * vec->z ) ) + ( mat->col3.x * vec->w ) );
-    tmpY = ( ( ( ( mat->col0.y * vec->x ) + ( mat->col1.y * vec->y ) ) + ( mat->col2.y * vec->z ) ) + ( mat->col3.y * vec->w ) );
-    tmpZ = ( ( ( ( mat->col0.z * vec->x ) + ( mat->col1.z * vec->y ) ) + ( mat->col2.z * vec->z ) ) + ( mat->col3.z * vec->w ) );
-    tmpW = ( ( ( ( mat->col0.w * vec->x ) + ( mat->col1.w * vec->y ) ) + ( mat->col2.w * vec->z ) ) + ( mat->col3.w * vec->w ) );
-    vmathV4MakeFromElems( result, tmpX, tmpY, tmpZ, tmpW );
-}
-
-static inline void vmathM4MulV3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector3 *vec )
-{
-    result->x = ( ( ( mat->col0.x * vec->x ) + ( mat->col1.x * vec->y ) ) + ( mat->col2.x * vec->z ) );
-    result->y = ( ( ( mat->col0.y * vec->x ) + ( mat->col1.y * vec->y ) ) + ( mat->col2.y * vec->z ) );
-    result->z = ( ( ( mat->col0.z * vec->x ) + ( mat->col1.z * vec->y ) ) + ( mat->col2.z * vec->z ) );
-    result->w = ( ( ( mat->col0.w * vec->x ) + ( mat->col1.w * vec->y ) ) + ( mat->col2.w * vec->z ) );
-}
-
-static inline void vmathM4MulP3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathPoint3 *pnt )
-{
-    result->x = ( ( ( ( mat->col0.x * pnt->x ) + ( mat->col1.x * pnt->y ) ) + ( mat->col2.x * pnt->z ) ) + mat->col3.x );
-    result->y = ( ( ( ( mat->col0.y * pnt->x ) + ( mat->col1.y * pnt->y ) ) + ( mat->col2.y * pnt->z ) ) + mat->col3.y );
-    result->z = ( ( ( ( mat->col0.z * pnt->x ) + ( mat->col1.z * pnt->y ) ) + ( mat->col2.z * pnt->z ) ) + mat->col3.z );
-    result->w = ( ( ( ( mat->col0.w * pnt->x ) + ( mat->col1.w * pnt->y ) ) + ( mat->col2.w * pnt->z ) ) + mat->col3.w );
-}
-
-static inline void vmathM4Mul( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
-{
-    VmathMatrix4 tmpResult;
-    vmathM4MulV4( &tmpResult.col0, mat0, &mat1->col0 );
-    vmathM4MulV4( &tmpResult.col1, mat0, &mat1->col1 );
-    vmathM4MulV4( &tmpResult.col2, mat0, &mat1->col2 );
-    vmathM4MulV4( &tmpResult.col3, mat0, &mat1->col3 );
-    vmathM4Copy( result, &tmpResult );
-}
-
-static inline void vmathM4MulT3( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathTransform3 *tfrm1 )
-{
-    VmathMatrix4 tmpResult;
-    VmathPoint3 tmpP3_0;
-    vmathM4MulV3( &tmpResult.col0, mat, &tfrm1->col0 );
-    vmathM4MulV3( &tmpResult.col1, mat, &tfrm1->col1 );
-    vmathM4MulV3( &tmpResult.col2, mat, &tfrm1->col2 );
-    vmathP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
-    vmathM4MulP3( &tmpResult.col3, mat, &tmpP3_0 );
-    vmathM4Copy( result, &tmpResult );
-}
-
-static inline void vmathM4MulPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
-{
-    vmathV4MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV4MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV4MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathV4MulPerElem( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathM4MakeIdentity( VmathMatrix4 *result )
-{
-    vmathV4MakeXAxis( &result->col0 );
-    vmathV4MakeYAxis( &result->col1 );
-    vmathV4MakeZAxis( &result->col2 );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4SetUpper3x3( VmathMatrix4 *result, const VmathMatrix3 *mat3 )
-{
-    vmathV4SetXYZ( &result->col0, &mat3->col0 );
-    vmathV4SetXYZ( &result->col1, &mat3->col1 );
-    vmathV4SetXYZ( &result->col2, &mat3->col2 );
-}
-
-static inline void vmathM4GetUpper3x3( VmathMatrix3 *result, const VmathMatrix4 *mat )
-{
-    vmathV4GetXYZ( &result->col0, &mat->col0 );
-    vmathV4GetXYZ( &result->col1, &mat->col1 );
-    vmathV4GetXYZ( &result->col2, &mat->col2 );
-}
-
-static inline void vmathM4SetTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec )
-{
-    vmathV4SetXYZ( &result->col3, translateVec );
-}
-
-static inline void vmathM4GetTranslation( VmathVector3 *result, const VmathMatrix4 *mat )
-{
-    vmathV4GetXYZ( result, &mat->col3 );
-}
-
-static inline void vmathM4MakeRotationX( VmathMatrix4 *result, float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    vmathV4MakeXAxis( &result->col0 );
-    vmathV4MakeFromElems( &result->col1, 0.0f, c, s, 0.0f );
-    vmathV4MakeFromElems( &result->col2, 0.0f, -s, c, 0.0f );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationY( VmathMatrix4 *result, float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    vmathV4MakeFromElems( &result->col0, c, 0.0f, -s, 0.0f );
-    vmathV4MakeYAxis( &result->col1 );
-    vmathV4MakeFromElems( &result->col2, s, 0.0f, c, 0.0f );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationZ( VmathMatrix4 *result, float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    vmathV4MakeFromElems( &result->col0, c, s, 0.0f, 0.0f );
-    vmathV4MakeFromElems( &result->col1, -s, c, 0.0f, 0.0f );
-    vmathV4MakeZAxis( &result->col2 );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationZYX( VmathMatrix4 *result, const VmathVector3 *radiansXYZ )
-{
-    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sX = sinf( radiansXYZ->x );
-    cX = cosf( radiansXYZ->x );
-    sY = sinf( radiansXYZ->y );
-    cY = cosf( radiansXYZ->y );
-    sZ = sinf( radiansXYZ->z );
-    cZ = cosf( radiansXYZ->z );
-    tmp0 = ( cZ * sY );
-    tmp1 = ( sZ * sY );
-    vmathV4MakeFromElems( &result->col0, ( cZ * cY ), ( sZ * cY ), -sY, 0.0f );
-    vmathV4MakeFromElems( &result->col1, ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ), 0.0f );
-    vmathV4MakeFromElems( &result->col2, ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ), 0.0f );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationAxis( VmathMatrix4 *result, float radians, const VmathVector3 *unitVec )
-{
-    float x, y, z, s, c, oneMinusC, xy, yz, zx;
-    s = sinf( radians );
-    c = cosf( radians );
-    x = unitVec->x;
-    y = unitVec->y;
-    z = unitVec->z;
-    xy = ( x * y );
-    yz = ( y * z );
-    zx = ( z * x );
-    oneMinusC = ( 1.0f - c );
-    vmathV4MakeFromElems( &result->col0, ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ), 0.0f );
-    vmathV4MakeFromElems( &result->col1, ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ), 0.0f );
-    vmathV4MakeFromElems( &result->col2, ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ), 0.0f );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationQ( VmathMatrix4 *result, const VmathQuat *unitQuat )
-{
-    VmathTransform3 tmpT3_0;
-    vmathT3MakeRotationQ( &tmpT3_0, unitQuat );
-    vmathM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline void vmathM4MakeScale( VmathMatrix4 *result, const VmathVector3 *scaleVec )
-{
-    vmathV4MakeFromElems( &result->col0, scaleVec->x, 0.0f, 0.0f, 0.0f );
-    vmathV4MakeFromElems( &result->col1, 0.0f, scaleVec->y, 0.0f, 0.0f );
-    vmathV4MakeFromElems( &result->col2, 0.0f, 0.0f, scaleVec->z, 0.0f );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4AppendScale( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathVector3 *scaleVec )
-{
-    vmathV4ScalarMul( &result->col0, &mat->col0, vmathV3GetX( scaleVec ) );
-    vmathV4ScalarMul( &result->col1, &mat->col1, vmathV3GetY( scaleVec ) );
-    vmathV4ScalarMul( &result->col2, &mat->col2, vmathV3GetZ( scaleVec ) );
-    vmathV4Copy( &result->col3, &mat->col3 );
-}
-
-static inline void vmathM4PrependScale( VmathMatrix4 *result, const VmathVector3 *scaleVec, const VmathMatrix4 *mat )
-{
-    VmathVector4 scale4;
-    vmathV4MakeFromV3Scalar( &scale4, scaleVec, 1.0f );
-    vmathV4MulPerElem( &result->col0, &mat->col0, &scale4 );
-    vmathV4MulPerElem( &result->col1, &mat->col1, &scale4 );
-    vmathV4MulPerElem( &result->col2, &mat->col2, &scale4 );
-    vmathV4MulPerElem( &result->col3, &mat->col3, &scale4 );
-}
-
-static inline void vmathM4MakeTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec )
-{
-    vmathV4MakeXAxis( &result->col0 );
-    vmathV4MakeYAxis( &result->col1 );
-    vmathV4MakeZAxis( &result->col2 );
-    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
-}
-
-static inline void vmathM4MakeLookAt( VmathMatrix4 *result, const VmathPoint3 *eyePos, const VmathPoint3 *lookAtPos, const VmathVector3 *upVec )
-{
-    VmathMatrix4 m4EyeFrame;
-    VmathVector3 v3X, v3Y, v3Z, tmpV3_0, tmpV3_1;
-    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
-    vmathV3Normalize( &v3Y, upVec );
-    vmathP3Sub( &tmpV3_0, eyePos, lookAtPos );
-    vmathV3Normalize( &v3Z, &tmpV3_0 );
-    vmathV3Cross( &tmpV3_1, &v3Y, &v3Z );
-    vmathV3Normalize( &v3X, &tmpV3_1 );
-    vmathV3Cross( &v3Y, &v3Z, &v3X );
-    vmathV4MakeFromV3( &tmpV4_0, &v3X );
-    vmathV4MakeFromV3( &tmpV4_1, &v3Y );
-    vmathV4MakeFromV3( &tmpV4_2, &v3Z );
-    vmathV4MakeFromP3( &tmpV4_3, eyePos );
-    vmathM4MakeFromCols( &m4EyeFrame, &tmpV4_0, &tmpV4_1, &tmpV4_2, &tmpV4_3 );
-    vmathM4OrthoInverse( result, &m4EyeFrame );
-}
-
-static inline void vmathM4MakePerspective( VmathMatrix4 *result, float fovyRadians, float aspect, float zNear, float zFar )
-{
-    float f, rangeInv;
-    f = tanf( ( (float)( _VECTORMATH_PI_OVER_2 ) - ( 0.5f * fovyRadians ) ) );
-    rangeInv = ( 1.0f / ( zNear - zFar ) );
-    vmathV4MakeFromElems( &result->col0, ( f / aspect ), 0.0f, 0.0f, 0.0f );
-    vmathV4MakeFromElems( &result->col1, 0.0f, f, 0.0f, 0.0f );
-    vmathV4MakeFromElems( &result->col2, 0.0f, 0.0f, ( ( zNear + zFar ) * rangeInv ), -1.0f );
-    vmathV4MakeFromElems( &result->col3, 0.0f, 0.0f, ( ( ( zNear * zFar ) * rangeInv ) * 2.0f ), 0.0f );
-}
-
-static inline void vmathM4MakeFrustum( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
-    sum_rl = ( right + left );
-    sum_tb = ( top + bottom );
-    sum_nf = ( zNear + zFar );
-    inv_rl = ( 1.0f / ( right - left ) );
-    inv_tb = ( 1.0f / ( top - bottom ) );
-    inv_nf = ( 1.0f / ( zNear - zFar ) );
-    n2 = ( zNear + zNear );
-    vmathV4MakeFromElems( &result->col0, ( n2 * inv_rl ), 0.0f, 0.0f, 0.0f );
-    vmathV4MakeFromElems( &result->col1, 0.0f, ( n2 * inv_tb ), 0.0f, 0.0f );
-    vmathV4MakeFromElems( &result->col2, ( sum_rl * inv_rl ), ( sum_tb * inv_tb ), ( sum_nf * inv_nf ), -1.0f );
-    vmathV4MakeFromElems( &result->col3, 0.0f, 0.0f, ( ( n2 * inv_nf ) * zFar ), 0.0f );
-}
-
-static inline void vmathM4MakeOrthographic( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
-    sum_rl = ( right + left );
-    sum_tb = ( top + bottom );
-    sum_nf = ( zNear + zFar );
-    inv_rl = ( 1.0f / ( right - left ) );
-    inv_tb = ( 1.0f / ( top - bottom ) );
-    inv_nf = ( 1.0f / ( zNear - zFar ) );
-    vmathV4MakeFromElems( &result->col0, ( inv_rl + inv_rl ), 0.0f, 0.0f, 0.0f );
-    vmathV4MakeFromElems( &result->col1, 0.0f, ( inv_tb + inv_tb ), 0.0f, 0.0f );
-    vmathV4MakeFromElems( &result->col2, 0.0f, 0.0f, ( inv_nf + inv_nf ), 0.0f );
-    vmathV4MakeFromElems( &result->col3, ( -sum_rl * inv_rl ), ( -sum_tb * inv_tb ), ( sum_nf * inv_nf ), 1.0f );
-}
-
-static inline void vmathM4Select( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, unsigned int select1 )
-{
-    vmathV4Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
-    vmathV4Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
-    vmathV4Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
-    vmathV4Select( &result->col3, &mat0->col3, &mat1->col3, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathM4Print( const VmathMatrix4 *mat )
-{
-    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
-    vmathM4GetRow( &tmpV4_0, mat, 0 );
-    vmathV4Print( &tmpV4_0 );
-    vmathM4GetRow( &tmpV4_1, mat, 1 );
-    vmathV4Print( &tmpV4_1 );
-    vmathM4GetRow( &tmpV4_2, mat, 2 );
-    vmathV4Print( &tmpV4_2 );
-    vmathM4GetRow( &tmpV4_3, mat, 3 );
-    vmathV4Print( &tmpV4_3 );
-}
-
-static inline void vmathM4Prints( const VmathMatrix4 *mat, const char *name )
-{
-    printf("%s:\n", name);
-    vmathM4Print( mat );
-}
-
-#endif
-
-static inline void vmathT3Copy( VmathTransform3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( &result->col0, &tfrm->col0 );
-    vmathV3Copy( &result->col1, &tfrm->col1 );
-    vmathV3Copy( &result->col2, &tfrm->col2 );
-    vmathV3Copy( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathT3MakeFromScalar( VmathTransform3 *result, float scalar )
-{
-    vmathV3MakeFromScalar( &result->col0, scalar );
-    vmathV3MakeFromScalar( &result->col1, scalar );
-    vmathV3MakeFromScalar( &result->col2, scalar );
-    vmathV3MakeFromScalar( &result->col3, scalar );
-}
-
-static inline void vmathT3MakeFromCols( VmathTransform3 *result, const VmathVector3 *_col0, const VmathVector3 *_col1, const VmathVector3 *_col2, const VmathVector3 *_col3 )
-{
-    vmathV3Copy( &result->col0, _col0 );
-    vmathV3Copy( &result->col1, _col1 );
-    vmathV3Copy( &result->col2, _col2 );
-    vmathV3Copy( &result->col3, _col3 );
-}
-
-static inline void vmathT3MakeFromM3V3( VmathTransform3 *result, const VmathMatrix3 *tfrm, const VmathVector3 *translateVec )
-{
-    vmathT3SetUpper3x3( result, tfrm );
-    vmathT3SetTranslation( result, translateVec );
-}
-
-static inline void vmathT3MakeFromQV3( VmathTransform3 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec )
-{
-    VmathMatrix3 tmpM3_0;
-    vmathM3MakeFromQ( &tmpM3_0, unitQuat );
-    vmathT3SetUpper3x3( result, &tmpM3_0 );
-    vmathT3SetTranslation( result, translateVec );
-}
-
-static inline void vmathT3SetCol0( VmathTransform3 *result, const VmathVector3 *_col0 )
-{
-    vmathV3Copy( &result->col0, _col0 );
-}
-
-static inline void vmathT3SetCol1( VmathTransform3 *result, const VmathVector3 *_col1 )
-{
-    vmathV3Copy( &result->col1, _col1 );
-}
-
-static inline void vmathT3SetCol2( VmathTransform3 *result, const VmathVector3 *_col2 )
-{
-    vmathV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathT3SetCol3( VmathTransform3 *result, const VmathVector3 *_col3 )
-{
-    vmathV3Copy( &result->col3, _col3 );
-}
-
-static inline void vmathT3SetCol( VmathTransform3 *result, int col, const VmathVector3 *vec )
-{
-    vmathV3Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathT3SetRow( VmathTransform3 *result, int row, const VmathVector4 *vec )
-{
-    vmathV3SetElem( &result->col0, row, vmathV4GetElem( vec, 0 ) );
-    vmathV3SetElem( &result->col1, row, vmathV4GetElem( vec, 1 ) );
-    vmathV3SetElem( &result->col2, row, vmathV4GetElem( vec, 2 ) );
-    vmathV3SetElem( &result->col3, row, vmathV4GetElem( vec, 3 ) );
-}
-
-static inline void vmathT3SetElem( VmathTransform3 *result, int col, int row, float val )
-{
-    VmathVector3 tmpV3_0;
-    vmathT3GetCol( &tmpV3_0, result, col );
-    vmathV3SetElem( &tmpV3_0, row, val );
-    vmathT3SetCol( result, col, &tmpV3_0 );
-}
-
-static inline float vmathT3GetElem( const VmathTransform3 *tfrm, int col, int row )
-{
-    VmathVector3 tmpV3_0;
-    vmathT3GetCol( &tmpV3_0, tfrm, col );
-    return vmathV3GetElem( &tmpV3_0, row );
-}
-
-static inline void vmathT3GetCol0( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col0 );
-}
-
-static inline void vmathT3GetCol1( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col1 );
-}
-
-static inline void vmathT3GetCol2( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col2 );
-}
-
-static inline void vmathT3GetCol3( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col3 );
-}
-
-static inline void vmathT3GetCol( VmathVector3 *result, const VmathTransform3 *tfrm, int col )
-{
-    vmathV3Copy( result, (&tfrm->col0 + col) );
-}
-
-static inline void vmathT3GetRow( VmathVector4 *result, const VmathTransform3 *tfrm, int row )
-{
-    vmathV4MakeFromElems( result, vmathV3GetElem( &tfrm->col0, row ), vmathV3GetElem( &tfrm->col1, row ), vmathV3GetElem( &tfrm->col2, row ), vmathV3GetElem( &tfrm->col3, row ) );
-}
-
-static inline void vmathT3Inverse( VmathTransform3 *result, const VmathTransform3 *tfrm )
-{
-    VmathVector3 tmp0, tmp1, tmp2, inv0, inv1, inv2, tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5;
-    float detinv;
-    vmathV3Cross( &tmp0, &tfrm->col1, &tfrm->col2 );
-    vmathV3Cross( &tmp1, &tfrm->col2, &tfrm->col0 );
-    vmathV3Cross( &tmp2, &tfrm->col0, &tfrm->col1 );
-    detinv = ( 1.0f / vmathV3Dot( &tfrm->col2, &tmp2 ) );
-    vmathV3MakeFromElems( &inv0, ( tmp0.x * detinv ), ( tmp1.x * detinv ), ( tmp2.x * detinv ) );
-    vmathV3MakeFromElems( &inv1, ( tmp0.y * detinv ), ( tmp1.y * detinv ), ( tmp2.y * detinv ) );
-    vmathV3MakeFromElems( &inv2, ( tmp0.z * detinv ), ( tmp1.z * detinv ), ( tmp2.z * detinv ) );
-    vmathV3Copy( &result->col0, &inv0 );
-    vmathV3Copy( &result->col1, &inv1 );
-    vmathV3Copy( &result->col2, &inv2 );
-    vmathV3ScalarMul( &tmpV3_0, &inv0, tfrm->col3.x );
-    vmathV3ScalarMul( &tmpV3_1, &inv1, tfrm->col3.y );
-    vmathV3ScalarMul( &tmpV3_2, &inv2, tfrm->col3.z );
-    vmathV3Add( &tmpV3_3, &tmpV3_1, &tmpV3_2 );
-    vmathV3Add( &tmpV3_4, &tmpV3_0, &tmpV3_3 );
-    vmathV3Neg( &tmpV3_5, &tmpV3_4 );
-    vmathV3Copy( &result->col3, &tmpV3_5 );
-}
-
-static inline void vmathT3OrthoInverse( VmathTransform3 *result, const VmathTransform3 *tfrm )
-{
-    VmathVector3 inv0, inv1, inv2, tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5;
-    vmathV3MakeFromElems( &inv0, tfrm->col0.x, tfrm->col1.x, tfrm->col2.x );
-    vmathV3MakeFromElems( &inv1, tfrm->col0.y, tfrm->col1.y, tfrm->col2.y );
-    vmathV3MakeFromElems( &inv2, tfrm->col0.z, tfrm->col1.z, tfrm->col2.z );
-    vmathV3Copy( &result->col0, &inv0 );
-    vmathV3Copy( &result->col1, &inv1 );
-    vmathV3Copy( &result->col2, &inv2 );
-    vmathV3ScalarMul( &tmpV3_0, &inv0, tfrm->col3.x );
-    vmathV3ScalarMul( &tmpV3_1, &inv1, tfrm->col3.y );
-    vmathV3ScalarMul( &tmpV3_2, &inv2, tfrm->col3.z );
-    vmathV3Add( &tmpV3_3, &tmpV3_1, &tmpV3_2 );
-    vmathV3Add( &tmpV3_4, &tmpV3_0, &tmpV3_3 );
-    vmathV3Neg( &tmpV3_5, &tmpV3_4 );
-    vmathV3Copy( &result->col3, &tmpV3_5 );
-}
-
-static inline void vmathT3AbsPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3AbsPerElem( &result->col0, &tfrm->col0 );
-    vmathV3AbsPerElem( &result->col1, &tfrm->col1 );
-    vmathV3AbsPerElem( &result->col2, &tfrm->col2 );
-    vmathV3AbsPerElem( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathT3MulV3( VmathVector3 *result, const VmathTransform3 *tfrm, const VmathVector3 *vec )
-{
-    float tmpX, tmpY, tmpZ;
-    tmpX = ( ( ( tfrm->col0.x * vec->x ) + ( tfrm->col1.x * vec->y ) ) + ( tfrm->col2.x * vec->z ) );
-    tmpY = ( ( ( tfrm->col0.y * vec->x ) + ( tfrm->col1.y * vec->y ) ) + ( tfrm->col2.y * vec->z ) );
-    tmpZ = ( ( ( tfrm->col0.z * vec->x ) + ( tfrm->col1.z * vec->y ) ) + ( tfrm->col2.z * vec->z ) );
-    vmathV3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathT3MulP3( VmathPoint3 *result, const VmathTransform3 *tfrm, const VmathPoint3 *pnt )
-{
-    float tmpX, tmpY, tmpZ;
-    tmpX = ( ( ( ( tfrm->col0.x * pnt->x ) + ( tfrm->col1.x * pnt->y ) ) + ( tfrm->col2.x * pnt->z ) ) + tfrm->col3.x );
-    tmpY = ( ( ( ( tfrm->col0.y * pnt->x ) + ( tfrm->col1.y * pnt->y ) ) + ( tfrm->col2.y * pnt->z ) ) + tfrm->col3.y );
-    tmpZ = ( ( ( ( tfrm->col0.z * pnt->x ) + ( tfrm->col1.z * pnt->y ) ) + ( tfrm->col2.z * pnt->z ) ) + tfrm->col3.z );
-    vmathP3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathT3Mul( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 )
-{
-    VmathTransform3 tmpResult;
-    VmathPoint3 tmpP3_0, tmpP3_1;
-    vmathT3MulV3( &tmpResult.col0, tfrm0, &tfrm1->col0 );
-    vmathT3MulV3( &tmpResult.col1, tfrm0, &tfrm1->col1 );
-    vmathT3MulV3( &tmpResult.col2, tfrm0, &tfrm1->col2 );
-    vmathP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
-    vmathT3MulP3( &tmpP3_1, tfrm0, &tmpP3_0 );
-    vmathV3MakeFromP3( &tmpResult.col3, &tmpP3_1 );
-    vmathT3Copy( result, &tmpResult );
-}
-
-static inline void vmathT3MulPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 )
-{
-    vmathV3MulPerElem( &result->col0, &tfrm0->col0, &tfrm1->col0 );
-    vmathV3MulPerElem( &result->col1, &tfrm0->col1, &tfrm1->col1 );
-    vmathV3MulPerElem( &result->col2, &tfrm0->col2, &tfrm1->col2 );
-    vmathV3MulPerElem( &result->col3, &tfrm0->col3, &tfrm1->col3 );
-}
-
-static inline void vmathT3MakeIdentity( VmathTransform3 *result )
-{
-    vmathV3MakeXAxis( &result->col0 );
-    vmathV3MakeYAxis( &result->col1 );
-    vmathV3MakeZAxis( &result->col2 );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3SetUpper3x3( VmathTransform3 *result, const VmathMatrix3 *tfrm )
-{
-    vmathV3Copy( &result->col0, &tfrm->col0 );
-    vmathV3Copy( &result->col1, &tfrm->col1 );
-    vmathV3Copy( &result->col2, &tfrm->col2 );
-}
-
-static inline void vmathT3GetUpper3x3( VmathMatrix3 *result, const VmathTransform3 *tfrm )
-{
-    vmathM3MakeFromCols( result, &tfrm->col0, &tfrm->col1, &tfrm->col2 );
-}
-
-static inline void vmathT3SetTranslation( VmathTransform3 *result, const VmathVector3 *translateVec )
-{
-    vmathV3Copy( &result->col3, translateVec );
-}
-
-static inline void vmathT3GetTranslation( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col3 );
-}
-
-static inline void vmathT3MakeRotationX( VmathTransform3 *result, float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    vmathV3MakeXAxis( &result->col0 );
-    vmathV3MakeFromElems( &result->col1, 0.0f, c, s );
-    vmathV3MakeFromElems( &result->col2, 0.0f, -s, c );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3MakeRotationY( VmathTransform3 *result, float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    vmathV3MakeFromElems( &result->col0, c, 0.0f, -s );
-    vmathV3MakeYAxis( &result->col1 );
-    vmathV3MakeFromElems( &result->col2, s, 0.0f, c );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3MakeRotationZ( VmathTransform3 *result, float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    vmathV3MakeFromElems( &result->col0, c, s, 0.0f );
-    vmathV3MakeFromElems( &result->col1, -s, c, 0.0f );
-    vmathV3MakeZAxis( &result->col2 );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3MakeRotationZYX( VmathTransform3 *result, const VmathVector3 *radiansXYZ )
-{
-    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sX = sinf( radiansXYZ->x );
-    cX = cosf( radiansXYZ->x );
-    sY = sinf( radiansXYZ->y );
-    cY = cosf( radiansXYZ->y );
-    sZ = sinf( radiansXYZ->z );
-    cZ = cosf( radiansXYZ->z );
-    tmp0 = ( cZ * sY );
-    tmp1 = ( sZ * sY );
-    vmathV3MakeFromElems( &result->col0, ( cZ * cY ), ( sZ * cY ), -sY );
-    vmathV3MakeFromElems( &result->col1, ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) );
-    vmathV3MakeFromElems( &result->col2, ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3MakeRotationAxis( VmathTransform3 *result, float radians, const VmathVector3 *unitVec )
-{
-    VmathMatrix3 tmpM3_0;
-    VmathVector3 tmpV3_0;
-    vmathM3MakeRotationAxis( &tmpM3_0, radians, unitVec );
-    vmathV3MakeFromScalar( &tmpV3_0, 0.0f );
-    vmathT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
-}
-
-static inline void vmathT3MakeRotationQ( VmathTransform3 *result, const VmathQuat *unitQuat )
-{
-    VmathMatrix3 tmpM3_0;
-    VmathVector3 tmpV3_0;
-    vmathM3MakeFromQ( &tmpM3_0, unitQuat );
-    vmathV3MakeFromScalar( &tmpV3_0, 0.0f );
-    vmathT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
-}
-
-static inline void vmathT3MakeScale( VmathTransform3 *result, const VmathVector3 *scaleVec )
-{
-    vmathV3MakeFromElems( &result->col0, scaleVec->x, 0.0f, 0.0f );
-    vmathV3MakeFromElems( &result->col1, 0.0f, scaleVec->y, 0.0f );
-    vmathV3MakeFromElems( &result->col2, 0.0f, 0.0f, scaleVec->z );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3AppendScale( VmathTransform3 *result, const VmathTransform3 *tfrm, const VmathVector3 *scaleVec )
-{
-    vmathV3ScalarMul( &result->col0, &tfrm->col0, vmathV3GetX( scaleVec ) );
-    vmathV3ScalarMul( &result->col1, &tfrm->col1, vmathV3GetY( scaleVec ) );
-    vmathV3ScalarMul( &result->col2, &tfrm->col2, vmathV3GetZ( scaleVec ) );
-    vmathV3Copy( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathT3PrependScale( VmathTransform3 *result, const VmathVector3 *scaleVec, const VmathTransform3 *tfrm )
-{
-    vmathV3MulPerElem( &result->col0, &tfrm->col0, scaleVec );
-    vmathV3MulPerElem( &result->col1, &tfrm->col1, scaleVec );
-    vmathV3MulPerElem( &result->col2, &tfrm->col2, scaleVec );
-    vmathV3MulPerElem( &result->col3, &tfrm->col3, scaleVec );
-}
-
-static inline void vmathT3MakeTranslation( VmathTransform3 *result, const VmathVector3 *translateVec )
-{
-    vmathV3MakeXAxis( &result->col0 );
-    vmathV3MakeYAxis( &result->col1 );
-    vmathV3MakeZAxis( &result->col2 );
-    vmathV3Copy( &result->col3, translateVec );
-}
-
-static inline void vmathT3Select( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, unsigned int select1 )
-{
-    vmathV3Select( &result->col0, &tfrm0->col0, &tfrm1->col0, select1 );
-    vmathV3Select( &result->col1, &tfrm0->col1, &tfrm1->col1, select1 );
-    vmathV3Select( &result->col2, &tfrm0->col2, &tfrm1->col2, select1 );
-    vmathV3Select( &result->col3, &tfrm0->col3, &tfrm1->col3, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathT3Print( const VmathTransform3 *tfrm )
-{
-    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2;
-    vmathT3GetRow( &tmpV4_0, tfrm, 0 );
-    vmathV4Print( &tmpV4_0 );
-    vmathT3GetRow( &tmpV4_1, tfrm, 1 );
-    vmathV4Print( &tmpV4_1 );
-    vmathT3GetRow( &tmpV4_2, tfrm, 2 );
-    vmathV4Print( &tmpV4_2 );
-}
-
-static inline void vmathT3Prints( const VmathTransform3 *tfrm, const char *name )
-{
-    printf("%s:\n", name);
-    vmathT3Print( tfrm );
-}
-
-#endif
-
-static inline void vmathQMakeFromM3( VmathQuat *result, const VmathMatrix3 *tfrm )
-{
-    float trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
-    int negTrace, ZgtX, ZgtY, YgtX;
-    int largestXorY, largestYorZ, largestZorX;
-
-    xx = tfrm->col0.x;
-    yx = tfrm->col0.y;
-    zx = tfrm->col0.z;
-    xy = tfrm->col1.x;
-    yy = tfrm->col1.y;
-    zy = tfrm->col1.z;
-    xz = tfrm->col2.x;
-    yz = tfrm->col2.y;
-    zz = tfrm->col2.z;
-
-    trace = ( ( xx + yy ) + zz );
-
-    negTrace = ( trace < 0.0f );
-    ZgtX = zz > xx;
-    ZgtY = zz > yy;
-    YgtX = yy > xx;
-    largestXorY = ( !ZgtX || !ZgtY ) && negTrace;
-    largestYorZ = ( YgtX || ZgtX ) && negTrace;
-    largestZorX = ( ZgtY || !YgtX ) && negTrace;
-    
-    if ( largestXorY )
-    {
-        zz = -zz;
-        xy = -xy;
-    }
-    if ( largestYorZ )
-    {
-        xx = -xx;
-        yz = -yz;
-    }
-    if ( largestZorX )
-    {
-        yy = -yy;
-        zx = -zx;
-    }
-
-    radicand = ( ( ( xx + yy ) + zz ) + 1.0f );
-    scale = ( 0.5f * ( 1.0f / sqrtf( radicand ) ) );
-
-    tmpx = ( ( zy - yz ) * scale );
-    tmpy = ( ( xz - zx ) * scale );
-    tmpz = ( ( yx - xy ) * scale );
-    tmpw = ( radicand * scale );
-    qx = tmpx;
-    qy = tmpy;
-    qz = tmpz;
-    qw = tmpw;
-
-    if ( largestXorY )
-    {
-        qx = tmpw;
-        qy = tmpz;
-        qz = tmpy;
-        qw = tmpx;
-    }
-    if ( largestYorZ )
-    {
-        tmpx = qx;
-        tmpz = qz;
-        qx = qy;
-        qy = tmpx;
-        qz = qw;
-        qw = tmpz;
-    }
-
-    result->x = qx;
-    result->y = qy;
-    result->z = qz;
-    result->w = qw;
-}
-
-static inline void vmathV3Outer( VmathMatrix3 *result, const VmathVector3 *tfrm0, const VmathVector3 *tfrm1 )
-{
-    vmathV3ScalarMul( &result->col0, tfrm0, vmathV3GetX( tfrm1 ) );
-    vmathV3ScalarMul( &result->col1, tfrm0, vmathV3GetY( tfrm1 ) );
-    vmathV3ScalarMul( &result->col2, tfrm0, vmathV3GetZ( tfrm1 ) );
-}
-
-static inline void vmathV4Outer( VmathMatrix4 *result, const VmathVector4 *tfrm0, const VmathVector4 *tfrm1 )
-{
-    vmathV4ScalarMul( &result->col0, tfrm0, vmathV4GetX( tfrm1 ) );
-    vmathV4ScalarMul( &result->col1, tfrm0, vmathV4GetY( tfrm1 ) );
-    vmathV4ScalarMul( &result->col2, tfrm0, vmathV4GetZ( tfrm1 ) );
-    vmathV4ScalarMul( &result->col3, tfrm0, vmathV4GetW( tfrm1 ) );
-}
-
-static inline void vmathV3RowMul( VmathVector3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat )
-{
-    float tmpX, tmpY, tmpZ;
-    tmpX = ( ( ( vec->x * mat->col0.x ) + ( vec->y * mat->col0.y ) ) + ( vec->z * mat->col0.z ) );
-    tmpY = ( ( ( vec->x * mat->col1.x ) + ( vec->y * mat->col1.y ) ) + ( vec->z * mat->col1.z ) );
-    tmpZ = ( ( ( vec->x * mat->col2.x ) + ( vec->y * mat->col2.y ) ) + ( vec->z * mat->col2.z ) );
-    vmathV3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathV3CrossMatrix( VmathMatrix3 *result, const VmathVector3 *vec )
-{
-    vmathV3MakeFromElems( &result->col0, 0.0f, vec->z, -vec->y );
-    vmathV3MakeFromElems( &result->col1, -vec->z, 0.0f, vec->x );
-    vmathV3MakeFromElems( &result->col2, vec->y, -vec->x, 0.0f );
-}
-
-static inline void vmathV3CrossMatrixMul( VmathMatrix3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat )
-{
-    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2;
-    vmathV3Cross( &tmpV3_0, vec, &mat->col0 );
-    vmathV3Cross( &tmpV3_1, vec, &mat->col1 );
-    vmathV3Cross( &tmpV3_2, vec, &mat->col2 );
-    vmathM3MakeFromCols( result, &tmpV3_0, &tmpV3_1, &tmpV3_2 );
-}
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_AOS_C_H
+#define _VECTORMATH_MAT_AOS_C_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ */
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+static inline void vmathM3Copy( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Copy( &result->col0, &mat->col0 );
+    vmathV3Copy( &result->col1, &mat->col1 );
+    vmathV3Copy( &result->col2, &mat->col2 );
+}
+
+static inline void vmathM3MakeFromScalar( VmathMatrix3 *result, float scalar )
+{
+    vmathV3MakeFromScalar( &result->col0, scalar );
+    vmathV3MakeFromScalar( &result->col1, scalar );
+    vmathV3MakeFromScalar( &result->col2, scalar );
+}
+
+static inline void vmathM3MakeFromQ( VmathMatrix3 *result, const VmathQuat *unitQuat )
+{
+    float qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
+    qx = unitQuat->x;
+    qy = unitQuat->y;
+    qz = unitQuat->z;
+    qw = unitQuat->w;
+    qx2 = ( qx + qx );
+    qy2 = ( qy + qy );
+    qz2 = ( qz + qz );
+    qxqx2 = ( qx * qx2 );
+    qxqy2 = ( qx * qy2 );
+    qxqz2 = ( qx * qz2 );
+    qxqw2 = ( qw * qx2 );
+    qyqy2 = ( qy * qy2 );
+    qyqz2 = ( qy * qz2 );
+    qyqw2 = ( qw * qy2 );
+    qzqz2 = ( qz * qz2 );
+    qzqw2 = ( qw * qz2 );
+    vmathV3MakeFromElems( &result->col0, ( ( 1.0f - qyqy2 ) - qzqz2 ), ( qxqy2 + qzqw2 ), ( qxqz2 - qyqw2 ) );
+    vmathV3MakeFromElems( &result->col1, ( qxqy2 - qzqw2 ), ( ( 1.0f - qxqx2 ) - qzqz2 ), ( qyqz2 + qxqw2 ) );
+    vmathV3MakeFromElems( &result->col2, ( qxqz2 + qyqw2 ), ( qyqz2 - qxqw2 ), ( ( 1.0f - qxqx2 ) - qyqy2 ) );
+}
+
+static inline void vmathM3MakeFromCols( VmathMatrix3 *result, const VmathVector3 *_col0, const VmathVector3 *_col1, const VmathVector3 *_col2 )
+{
+    vmathV3Copy( &result->col0, _col0 );
+    vmathV3Copy( &result->col1, _col1 );
+    vmathV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathM3SetCol0( VmathMatrix3 *result, const VmathVector3 *_col0 )
+{
+    vmathV3Copy( &result->col0, _col0 );
+}
+
+static inline void vmathM3SetCol1( VmathMatrix3 *result, const VmathVector3 *_col1 )
+{
+    vmathV3Copy( &result->col1, _col1 );
+}
+
+static inline void vmathM3SetCol2( VmathMatrix3 *result, const VmathVector3 *_col2 )
+{
+    vmathV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathM3SetCol( VmathMatrix3 *result, int col, const VmathVector3 *vec )
+{
+    vmathV3Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathM3SetRow( VmathMatrix3 *result, int row, const VmathVector3 *vec )
+{
+    vmathV3SetElem( &result->col0, row, vmathV3GetElem( vec, 0 ) );
+    vmathV3SetElem( &result->col1, row, vmathV3GetElem( vec, 1 ) );
+    vmathV3SetElem( &result->col2, row, vmathV3GetElem( vec, 2 ) );
+}
+
+static inline void vmathM3SetElem( VmathMatrix3 *result, int col, int row, float val )
+{
+    VmathVector3 tmpV3_0;
+    vmathM3GetCol( &tmpV3_0, result, col );
+    vmathV3SetElem( &tmpV3_0, row, val );
+    vmathM3SetCol( result, col, &tmpV3_0 );
+}
+
+static inline float vmathM3GetElem( const VmathMatrix3 *mat, int col, int row )
+{
+    VmathVector3 tmpV3_0;
+    vmathM3GetCol( &tmpV3_0, mat, col );
+    return vmathV3GetElem( &tmpV3_0, row );
+}
+
+static inline void vmathM3GetCol0( VmathVector3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Copy( result, &mat->col0 );
+}
+
+static inline void vmathM3GetCol1( VmathVector3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Copy( result, &mat->col1 );
+}
+
+static inline void vmathM3GetCol2( VmathVector3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Copy( result, &mat->col2 );
+}
+
+static inline void vmathM3GetCol( VmathVector3 *result, const VmathMatrix3 *mat, int col )
+{
+    vmathV3Copy( result, (&mat->col0 + col) );
+}
+
+static inline void vmathM3GetRow( VmathVector3 *result, const VmathMatrix3 *mat, int row )
+{
+    vmathV3MakeFromElems( result, vmathV3GetElem( &mat->col0, row ), vmathV3GetElem( &mat->col1, row ), vmathV3GetElem( &mat->col2, row ) );
+}
+
+static inline void vmathM3Transpose( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    VmathMatrix3 tmpResult;
+    vmathV3MakeFromElems( &tmpResult.col0, mat->col0.x, mat->col1.x, mat->col2.x );
+    vmathV3MakeFromElems( &tmpResult.col1, mat->col0.y, mat->col1.y, mat->col2.y );
+    vmathV3MakeFromElems( &tmpResult.col2, mat->col0.z, mat->col1.z, mat->col2.z );
+    vmathM3Copy( result, &tmpResult );
+}
+
+static inline void vmathM3Inverse( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    VmathVector3 tmp0, tmp1, tmp2;
+    float detinv;
+    vmathV3Cross( &tmp0, &mat->col1, &mat->col2 );
+    vmathV3Cross( &tmp1, &mat->col2, &mat->col0 );
+    vmathV3Cross( &tmp2, &mat->col0, &mat->col1 );
+    detinv = ( 1.0f / vmathV3Dot( &mat->col2, &tmp2 ) );
+    vmathV3MakeFromElems( &result->col0, ( tmp0.x * detinv ), ( tmp1.x * detinv ), ( tmp2.x * detinv ) );
+    vmathV3MakeFromElems( &result->col1, ( tmp0.y * detinv ), ( tmp1.y * detinv ), ( tmp2.y * detinv ) );
+    vmathV3MakeFromElems( &result->col2, ( tmp0.z * detinv ), ( tmp1.z * detinv ), ( tmp2.z * detinv ) );
+}
+
+static inline float vmathM3Determinant( const VmathMatrix3 *mat )
+{
+    VmathVector3 tmpV3_0;
+    vmathV3Cross( &tmpV3_0, &mat->col0, &mat->col1 );
+    return vmathV3Dot( &mat->col2, &tmpV3_0 );
+}
+
+static inline void vmathM3Add( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
+{
+    vmathV3Add( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV3Add( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV3Add( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathM3Sub( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
+{
+    vmathV3Sub( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV3Sub( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV3Sub( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathM3Neg( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Neg( &result->col0, &mat->col0 );
+    vmathV3Neg( &result->col1, &mat->col1 );
+    vmathV3Neg( &result->col2, &mat->col2 );
+}
+
+static inline void vmathM3AbsPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3AbsPerElem( &result->col0, &mat->col0 );
+    vmathV3AbsPerElem( &result->col1, &mat->col1 );
+    vmathV3AbsPerElem( &result->col2, &mat->col2 );
+}
+
+static inline void vmathM3ScalarMul( VmathMatrix3 *result, const VmathMatrix3 *mat, float scalar )
+{
+    vmathV3ScalarMul( &result->col0, &mat->col0, scalar );
+    vmathV3ScalarMul( &result->col1, &mat->col1, scalar );
+    vmathV3ScalarMul( &result->col2, &mat->col2, scalar );
+}
+
+static inline void vmathM3MulV3( VmathVector3 *result, const VmathMatrix3 *mat, const VmathVector3 *vec )
+{
+    float tmpX, tmpY, tmpZ;
+    tmpX = ( ( ( mat->col0.x * vec->x ) + ( mat->col1.x * vec->y ) ) + ( mat->col2.x * vec->z ) );
+    tmpY = ( ( ( mat->col0.y * vec->x ) + ( mat->col1.y * vec->y ) ) + ( mat->col2.y * vec->z ) );
+    tmpZ = ( ( ( mat->col0.z * vec->x ) + ( mat->col1.z * vec->y ) ) + ( mat->col2.z * vec->z ) );
+    vmathV3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathM3Mul( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
+{
+    VmathMatrix3 tmpResult;
+    vmathM3MulV3( &tmpResult.col0, mat0, &mat1->col0 );
+    vmathM3MulV3( &tmpResult.col1, mat0, &mat1->col1 );
+    vmathM3MulV3( &tmpResult.col2, mat0, &mat1->col2 );
+    vmathM3Copy( result, &tmpResult );
+}
+
+static inline void vmathM3MulPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
+{
+    vmathV3MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV3MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV3MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathM3MakeIdentity( VmathMatrix3 *result )
+{
+    vmathV3MakeXAxis( &result->col0 );
+    vmathV3MakeYAxis( &result->col1 );
+    vmathV3MakeZAxis( &result->col2 );
+}
+
+static inline void vmathM3MakeRotationX( VmathMatrix3 *result, float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    vmathV3MakeXAxis( &result->col0 );
+    vmathV3MakeFromElems( &result->col1, 0.0f, c, s );
+    vmathV3MakeFromElems( &result->col2, 0.0f, -s, c );
+}
+
+static inline void vmathM3MakeRotationY( VmathMatrix3 *result, float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    vmathV3MakeFromElems( &result->col0, c, 0.0f, -s );
+    vmathV3MakeYAxis( &result->col1 );
+    vmathV3MakeFromElems( &result->col2, s, 0.0f, c );
+}
+
+static inline void vmathM3MakeRotationZ( VmathMatrix3 *result, float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    vmathV3MakeFromElems( &result->col0, c, s, 0.0f );
+    vmathV3MakeFromElems( &result->col1, -s, c, 0.0f );
+    vmathV3MakeZAxis( &result->col2 );
+}
+
+static inline void vmathM3MakeRotationZYX( VmathMatrix3 *result, const VmathVector3 *radiansXYZ )
+{
+    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sX = sinf( radiansXYZ->x );
+    cX = cosf( radiansXYZ->x );
+    sY = sinf( radiansXYZ->y );
+    cY = cosf( radiansXYZ->y );
+    sZ = sinf( radiansXYZ->z );
+    cZ = cosf( radiansXYZ->z );
+    tmp0 = ( cZ * sY );
+    tmp1 = ( sZ * sY );
+    vmathV3MakeFromElems( &result->col0, ( cZ * cY ), ( sZ * cY ), -sY );
+    vmathV3MakeFromElems( &result->col1, ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) );
+    vmathV3MakeFromElems( &result->col2, ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) );
+}
+
+static inline void vmathM3MakeRotationAxis( VmathMatrix3 *result, float radians, const VmathVector3 *unitVec )
+{
+    float x, y, z, s, c, oneMinusC, xy, yz, zx;
+    s = sinf( radians );
+    c = cosf( radians );
+    x = unitVec->x;
+    y = unitVec->y;
+    z = unitVec->z;
+    xy = ( x * y );
+    yz = ( y * z );
+    zx = ( z * x );
+    oneMinusC = ( 1.0f - c );
+    vmathV3MakeFromElems( &result->col0, ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ) );
+    vmathV3MakeFromElems( &result->col1, ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ) );
+    vmathV3MakeFromElems( &result->col2, ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ) );
+}
+
+static inline void vmathM3MakeRotationQ( VmathMatrix3 *result, const VmathQuat *unitQuat )
+{
+    vmathM3MakeFromQ( result, unitQuat );
+}
+
+static inline void vmathM3MakeScale( VmathMatrix3 *result, const VmathVector3 *scaleVec )
+{
+    vmathV3MakeFromElems( &result->col0, scaleVec->x, 0.0f, 0.0f );
+    vmathV3MakeFromElems( &result->col1, 0.0f, scaleVec->y, 0.0f );
+    vmathV3MakeFromElems( &result->col2, 0.0f, 0.0f, scaleVec->z );
+}
+
+static inline void vmathM3AppendScale( VmathMatrix3 *result, const VmathMatrix3 *mat, const VmathVector3 *scaleVec )
+{
+    vmathV3ScalarMul( &result->col0, &mat->col0, vmathV3GetX( scaleVec ) );
+    vmathV3ScalarMul( &result->col1, &mat->col1, vmathV3GetY( scaleVec ) );
+    vmathV3ScalarMul( &result->col2, &mat->col2, vmathV3GetZ( scaleVec ) );
+}
+
+static inline void vmathM3PrependScale( VmathMatrix3 *result, const VmathVector3 *scaleVec, const VmathMatrix3 *mat )
+{
+    vmathV3MulPerElem( &result->col0, &mat->col0, scaleVec );
+    vmathV3MulPerElem( &result->col1, &mat->col1, scaleVec );
+    vmathV3MulPerElem( &result->col2, &mat->col2, scaleVec );
+}
+
+static inline void vmathM3Select( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, unsigned int select1 )
+{
+    vmathV3Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
+    vmathV3Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
+    vmathV3Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathM3Print( const VmathMatrix3 *mat )
+{
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2;
+    vmathM3GetRow( &tmpV3_0, mat, 0 );
+    vmathV3Print( &tmpV3_0 );
+    vmathM3GetRow( &tmpV3_1, mat, 1 );
+    vmathV3Print( &tmpV3_1 );
+    vmathM3GetRow( &tmpV3_2, mat, 2 );
+    vmathV3Print( &tmpV3_2 );
+}
+
+static inline void vmathM3Prints( const VmathMatrix3 *mat, const char *name )
+{
+    printf("%s:\n", name);
+    vmathM3Print( mat );
+}
+
+#endif
+
+static inline void vmathM4Copy( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( &result->col0, &mat->col0 );
+    vmathV4Copy( &result->col1, &mat->col1 );
+    vmathV4Copy( &result->col2, &mat->col2 );
+    vmathV4Copy( &result->col3, &mat->col3 );
+}
+
+static inline void vmathM4MakeFromScalar( VmathMatrix4 *result, float scalar )
+{
+    vmathV4MakeFromScalar( &result->col0, scalar );
+    vmathV4MakeFromScalar( &result->col1, scalar );
+    vmathV4MakeFromScalar( &result->col2, scalar );
+    vmathV4MakeFromScalar( &result->col3, scalar );
+}
+
+static inline void vmathM4MakeFromT3( VmathMatrix4 *result, const VmathTransform3 *mat )
+{
+    vmathV4MakeFromV3Scalar( &result->col0, &mat->col0, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col1, &mat->col1, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col2, &mat->col2, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col3, &mat->col3, 1.0f );
+}
+
+static inline void vmathM4MakeFromCols( VmathMatrix4 *result, const VmathVector4 *_col0, const VmathVector4 *_col1, const VmathVector4 *_col2, const VmathVector4 *_col3 )
+{
+    vmathV4Copy( &result->col0, _col0 );
+    vmathV4Copy( &result->col1, _col1 );
+    vmathV4Copy( &result->col2, _col2 );
+    vmathV4Copy( &result->col3, _col3 );
+}
+
+static inline void vmathM4MakeFromM3V3( VmathMatrix4 *result, const VmathMatrix3 *mat, const VmathVector3 *translateVec )
+{
+    vmathV4MakeFromV3Scalar( &result->col0, &mat->col0, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col1, &mat->col1, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col2, &mat->col2, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
+}
+
+static inline void vmathM4MakeFromQV3( VmathMatrix4 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec )
+{
+    VmathMatrix3 mat;
+    vmathM3MakeFromQ( &mat, unitQuat );
+    vmathV4MakeFromV3Scalar( &result->col0, &mat.col0, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col1, &mat.col1, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col2, &mat.col2, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
+}
+
+static inline void vmathM4SetCol0( VmathMatrix4 *result, const VmathVector4 *_col0 )
+{
+    vmathV4Copy( &result->col0, _col0 );
+}
+
+static inline void vmathM4SetCol1( VmathMatrix4 *result, const VmathVector4 *_col1 )
+{
+    vmathV4Copy( &result->col1, _col1 );
+}
+
+static inline void vmathM4SetCol2( VmathMatrix4 *result, const VmathVector4 *_col2 )
+{
+    vmathV4Copy( &result->col2, _col2 );
+}
+
+static inline void vmathM4SetCol3( VmathMatrix4 *result, const VmathVector4 *_col3 )
+{
+    vmathV4Copy( &result->col3, _col3 );
+}
+
+static inline void vmathM4SetCol( VmathMatrix4 *result, int col, const VmathVector4 *vec )
+{
+    vmathV4Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathM4SetRow( VmathMatrix4 *result, int row, const VmathVector4 *vec )
+{
+    vmathV4SetElem( &result->col0, row, vmathV4GetElem( vec, 0 ) );
+    vmathV4SetElem( &result->col1, row, vmathV4GetElem( vec, 1 ) );
+    vmathV4SetElem( &result->col2, row, vmathV4GetElem( vec, 2 ) );
+    vmathV4SetElem( &result->col3, row, vmathV4GetElem( vec, 3 ) );
+}
+
+static inline void vmathM4SetElem( VmathMatrix4 *result, int col, int row, float val )
+{
+    VmathVector4 tmpV3_0;
+    vmathM4GetCol( &tmpV3_0, result, col );
+    vmathV4SetElem( &tmpV3_0, row, val );
+    vmathM4SetCol( result, col, &tmpV3_0 );
+}
+
+static inline float vmathM4GetElem( const VmathMatrix4 *mat, int col, int row )
+{
+    VmathVector4 tmpV4_0;
+    vmathM4GetCol( &tmpV4_0, mat, col );
+    return vmathV4GetElem( &tmpV4_0, row );
+}
+
+static inline void vmathM4GetCol0( VmathVector4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( result, &mat->col0 );
+}
+
+static inline void vmathM4GetCol1( VmathVector4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( result, &mat->col1 );
+}
+
+static inline void vmathM4GetCol2( VmathVector4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( result, &mat->col2 );
+}
+
+static inline void vmathM4GetCol3( VmathVector4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( result, &mat->col3 );
+}
+
+static inline void vmathM4GetCol( VmathVector4 *result, const VmathMatrix4 *mat, int col )
+{
+    vmathV4Copy( result, (&mat->col0 + col) );
+}
+
+static inline void vmathM4GetRow( VmathVector4 *result, const VmathMatrix4 *mat, int row )
+{
+    vmathV4MakeFromElems( result, vmathV4GetElem( &mat->col0, row ), vmathV4GetElem( &mat->col1, row ), vmathV4GetElem( &mat->col2, row ), vmathV4GetElem( &mat->col3, row ) );
+}
+
+static inline void vmathM4Transpose( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    VmathMatrix4 tmpResult;
+    vmathV4MakeFromElems( &tmpResult.col0, mat->col0.x, mat->col1.x, mat->col2.x, mat->col3.x );
+    vmathV4MakeFromElems( &tmpResult.col1, mat->col0.y, mat->col1.y, mat->col2.y, mat->col3.y );
+    vmathV4MakeFromElems( &tmpResult.col2, mat->col0.z, mat->col1.z, mat->col2.z, mat->col3.z );
+    vmathV4MakeFromElems( &tmpResult.col3, mat->col0.w, mat->col1.w, mat->col2.w, mat->col3.w );
+    vmathM4Copy( result, &tmpResult );
+}
+
+static inline void vmathM4Inverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    VmathVector4 res0, res1, res2, res3;
+    float mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
+    mA = mat->col0.x;
+    mB = mat->col0.y;
+    mC = mat->col0.z;
+    mD = mat->col0.w;
+    mE = mat->col1.x;
+    mF = mat->col1.y;
+    mG = mat->col1.z;
+    mH = mat->col1.w;
+    mI = mat->col2.x;
+    mJ = mat->col2.y;
+    mK = mat->col2.z;
+    mL = mat->col2.w;
+    mM = mat->col3.x;
+    mN = mat->col3.y;
+    mO = mat->col3.z;
+    mP = mat->col3.w;
+    tmp0 = ( ( mK * mD ) - ( mC * mL ) );
+    tmp1 = ( ( mO * mH ) - ( mG * mP ) );
+    tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
+    tmp3 = ( ( mF * mO ) - ( mN * mG ) );
+    tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
+    tmp5 = ( ( mN * mH ) - ( mF * mP ) );
+    vmathV4SetX( &res0, ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) ) );
+    vmathV4SetY( &res0, ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) ) );
+    vmathV4SetZ( &res0, ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) ) );
+    vmathV4SetW( &res0, ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) ) );
+    detInv = ( 1.0f / ( ( ( ( mA * res0.x ) + ( mE * res0.y ) ) + ( mI * res0.z ) ) + ( mM * res0.w ) ) );
+    vmathV4SetX( &res1, ( mI * tmp1 ) );
+    vmathV4SetY( &res1, ( mM * tmp0 ) );
+    vmathV4SetZ( &res1, ( mA * tmp1 ) );
+    vmathV4SetW( &res1, ( mE * tmp0 ) );
+    vmathV4SetX( &res3, ( mI * tmp3 ) );
+    vmathV4SetY( &res3, ( mM * tmp2 ) );
+    vmathV4SetZ( &res3, ( mA * tmp3 ) );
+    vmathV4SetW( &res3, ( mE * tmp2 ) );
+    vmathV4SetX( &res2, ( mI * tmp5 ) );
+    vmathV4SetY( &res2, ( mM * tmp4 ) );
+    vmathV4SetZ( &res2, ( mA * tmp5 ) );
+    vmathV4SetW( &res2, ( mE * tmp4 ) );
+    tmp0 = ( ( mI * mB ) - ( mA * mJ ) );
+    tmp1 = ( ( mM * mF ) - ( mE * mN ) );
+    tmp2 = ( ( mI * mD ) - ( mA * mL ) );
+    tmp3 = ( ( mM * mH ) - ( mE * mP ) );
+    tmp4 = ( ( mI * mC ) - ( mA * mK ) );
+    tmp5 = ( ( mM * mG ) - ( mE * mO ) );
+    vmathV4SetX( &res2, ( ( ( mL * tmp1 ) - ( mJ * tmp3 ) ) + res2.x ) );
+    vmathV4SetY( &res2, ( ( ( mP * tmp0 ) - ( mN * tmp2 ) ) + res2.y ) );
+    vmathV4SetZ( &res2, ( ( ( mB * tmp3 ) - ( mD * tmp1 ) ) - res2.z ) );
+    vmathV4SetW( &res2, ( ( ( mF * tmp2 ) - ( mH * tmp0 ) ) - res2.w ) );
+    vmathV4SetX( &res3, ( ( ( mJ * tmp5 ) - ( mK * tmp1 ) ) + res3.x ) );
+    vmathV4SetY( &res3, ( ( ( mN * tmp4 ) - ( mO * tmp0 ) ) + res3.y ) );
+    vmathV4SetZ( &res3, ( ( ( mC * tmp1 ) - ( mB * tmp5 ) ) - res3.z ) );
+    vmathV4SetW( &res3, ( ( ( mG * tmp0 ) - ( mF * tmp4 ) ) - res3.w ) );
+    vmathV4SetX( &res1, ( ( ( mK * tmp3 ) - ( mL * tmp5 ) ) - res1.x ) );
+    vmathV4SetY( &res1, ( ( ( mO * tmp2 ) - ( mP * tmp4 ) ) - res1.y ) );
+    vmathV4SetZ( &res1, ( ( ( mD * tmp5 ) - ( mC * tmp3 ) ) + res1.z ) );
+    vmathV4SetW( &res1, ( ( ( mH * tmp4 ) - ( mG * tmp2 ) ) + res1.w ) );
+    vmathV4ScalarMul( &result->col0, &res0, detInv );
+    vmathV4ScalarMul( &result->col1, &res1, detInv );
+    vmathV4ScalarMul( &result->col2, &res2, detInv );
+    vmathV4ScalarMul( &result->col3, &res3, detInv );
+}
+
+static inline void vmathM4AffineInverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    VmathTransform3 affineMat, tmpT3_0;
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    vmathV4GetXYZ( &tmpV3_0, &mat->col0 );
+    vmathT3SetCol0( &affineMat, &tmpV3_0 );
+    vmathV4GetXYZ( &tmpV3_1, &mat->col1 );
+    vmathT3SetCol1( &affineMat, &tmpV3_1 );
+    vmathV4GetXYZ( &tmpV3_2, &mat->col2 );
+    vmathT3SetCol2( &affineMat, &tmpV3_2 );
+    vmathV4GetXYZ( &tmpV3_3, &mat->col3 );
+    vmathT3SetCol3( &affineMat, &tmpV3_3 );
+    vmathT3Inverse( &tmpT3_0, &affineMat );
+    vmathM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline void vmathM4OrthoInverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    VmathTransform3 affineMat, tmpT3_0;
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    vmathV4GetXYZ( &tmpV3_0, &mat->col0 );
+    vmathT3SetCol0( &affineMat, &tmpV3_0 );
+    vmathV4GetXYZ( &tmpV3_1, &mat->col1 );
+    vmathT3SetCol1( &affineMat, &tmpV3_1 );
+    vmathV4GetXYZ( &tmpV3_2, &mat->col2 );
+    vmathT3SetCol2( &affineMat, &tmpV3_2 );
+    vmathV4GetXYZ( &tmpV3_3, &mat->col3 );
+    vmathT3SetCol3( &affineMat, &tmpV3_3 );
+    vmathT3OrthoInverse( &tmpT3_0, &affineMat );
+    vmathM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline float vmathM4Determinant( const VmathMatrix4 *mat )
+{
+    float dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    mA = mat->col0.x;
+    mB = mat->col0.y;
+    mC = mat->col0.z;
+    mD = mat->col0.w;
+    mE = mat->col1.x;
+    mF = mat->col1.y;
+    mG = mat->col1.z;
+    mH = mat->col1.w;
+    mI = mat->col2.x;
+    mJ = mat->col2.y;
+    mK = mat->col2.z;
+    mL = mat->col2.w;
+    mM = mat->col3.x;
+    mN = mat->col3.y;
+    mO = mat->col3.z;
+    mP = mat->col3.w;
+    tmp0 = ( ( mK * mD ) - ( mC * mL ) );
+    tmp1 = ( ( mO * mH ) - ( mG * mP ) );
+    tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
+    tmp3 = ( ( mF * mO ) - ( mN * mG ) );
+    tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
+    tmp5 = ( ( mN * mH ) - ( mF * mP ) );
+    dx = ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) );
+    dy = ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) );
+    dz = ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) );
+    dw = ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) );
+    return ( ( ( ( mA * dx ) + ( mE * dy ) ) + ( mI * dz ) ) + ( mM * dw ) );
+}
+
+static inline void vmathM4Add( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
+{
+    vmathV4Add( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV4Add( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV4Add( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathV4Add( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathM4Sub( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
+{
+    vmathV4Sub( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV4Sub( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV4Sub( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathV4Sub( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathM4Neg( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Neg( &result->col0, &mat->col0 );
+    vmathV4Neg( &result->col1, &mat->col1 );
+    vmathV4Neg( &result->col2, &mat->col2 );
+    vmathV4Neg( &result->col3, &mat->col3 );
+}
+
+static inline void vmathM4AbsPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4AbsPerElem( &result->col0, &mat->col0 );
+    vmathV4AbsPerElem( &result->col1, &mat->col1 );
+    vmathV4AbsPerElem( &result->col2, &mat->col2 );
+    vmathV4AbsPerElem( &result->col3, &mat->col3 );
+}
+
+static inline void vmathM4ScalarMul( VmathMatrix4 *result, const VmathMatrix4 *mat, float scalar )
+{
+    vmathV4ScalarMul( &result->col0, &mat->col0, scalar );
+    vmathV4ScalarMul( &result->col1, &mat->col1, scalar );
+    vmathV4ScalarMul( &result->col2, &mat->col2, scalar );
+    vmathV4ScalarMul( &result->col3, &mat->col3, scalar );
+}
+
+static inline void vmathM4MulV4( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector4 *vec )
+{
+    float tmpX, tmpY, tmpZ, tmpW;
+    tmpX = ( ( ( ( mat->col0.x * vec->x ) + ( mat->col1.x * vec->y ) ) + ( mat->col2.x * vec->z ) ) + ( mat->col3.x * vec->w ) );
+    tmpY = ( ( ( ( mat->col0.y * vec->x ) + ( mat->col1.y * vec->y ) ) + ( mat->col2.y * vec->z ) ) + ( mat->col3.y * vec->w ) );
+    tmpZ = ( ( ( ( mat->col0.z * vec->x ) + ( mat->col1.z * vec->y ) ) + ( mat->col2.z * vec->z ) ) + ( mat->col3.z * vec->w ) );
+    tmpW = ( ( ( ( mat->col0.w * vec->x ) + ( mat->col1.w * vec->y ) ) + ( mat->col2.w * vec->z ) ) + ( mat->col3.w * vec->w ) );
+    vmathV4MakeFromElems( result, tmpX, tmpY, tmpZ, tmpW );
+}
+
+static inline void vmathM4MulV3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector3 *vec )
+{
+    result->x = ( ( ( mat->col0.x * vec->x ) + ( mat->col1.x * vec->y ) ) + ( mat->col2.x * vec->z ) );
+    result->y = ( ( ( mat->col0.y * vec->x ) + ( mat->col1.y * vec->y ) ) + ( mat->col2.y * vec->z ) );
+    result->z = ( ( ( mat->col0.z * vec->x ) + ( mat->col1.z * vec->y ) ) + ( mat->col2.z * vec->z ) );
+    result->w = ( ( ( mat->col0.w * vec->x ) + ( mat->col1.w * vec->y ) ) + ( mat->col2.w * vec->z ) );
+}
+
+static inline void vmathM4MulP3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathPoint3 *pnt )
+{
+    result->x = ( ( ( ( mat->col0.x * pnt->x ) + ( mat->col1.x * pnt->y ) ) + ( mat->col2.x * pnt->z ) ) + mat->col3.x );
+    result->y = ( ( ( ( mat->col0.y * pnt->x ) + ( mat->col1.y * pnt->y ) ) + ( mat->col2.y * pnt->z ) ) + mat->col3.y );
+    result->z = ( ( ( ( mat->col0.z * pnt->x ) + ( mat->col1.z * pnt->y ) ) + ( mat->col2.z * pnt->z ) ) + mat->col3.z );
+    result->w = ( ( ( ( mat->col0.w * pnt->x ) + ( mat->col1.w * pnt->y ) ) + ( mat->col2.w * pnt->z ) ) + mat->col3.w );
+}
+
+static inline void vmathM4Mul( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
+{
+    VmathMatrix4 tmpResult;
+    vmathM4MulV4( &tmpResult.col0, mat0, &mat1->col0 );
+    vmathM4MulV4( &tmpResult.col1, mat0, &mat1->col1 );
+    vmathM4MulV4( &tmpResult.col2, mat0, &mat1->col2 );
+    vmathM4MulV4( &tmpResult.col3, mat0, &mat1->col3 );
+    vmathM4Copy( result, &tmpResult );
+}
+
+static inline void vmathM4MulT3( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathTransform3 *tfrm1 )
+{
+    VmathMatrix4 tmpResult;
+    VmathPoint3 tmpP3_0;
+    vmathM4MulV3( &tmpResult.col0, mat, &tfrm1->col0 );
+    vmathM4MulV3( &tmpResult.col1, mat, &tfrm1->col1 );
+    vmathM4MulV3( &tmpResult.col2, mat, &tfrm1->col2 );
+    vmathP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
+    vmathM4MulP3( &tmpResult.col3, mat, &tmpP3_0 );
+    vmathM4Copy( result, &tmpResult );
+}
+
+static inline void vmathM4MulPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
+{
+    vmathV4MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV4MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV4MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathV4MulPerElem( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathM4MakeIdentity( VmathMatrix4 *result )
+{
+    vmathV4MakeXAxis( &result->col0 );
+    vmathV4MakeYAxis( &result->col1 );
+    vmathV4MakeZAxis( &result->col2 );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4SetUpper3x3( VmathMatrix4 *result, const VmathMatrix3 *mat3 )
+{
+    vmathV4SetXYZ( &result->col0, &mat3->col0 );
+    vmathV4SetXYZ( &result->col1, &mat3->col1 );
+    vmathV4SetXYZ( &result->col2, &mat3->col2 );
+}
+
+static inline void vmathM4GetUpper3x3( VmathMatrix3 *result, const VmathMatrix4 *mat )
+{
+    vmathV4GetXYZ( &result->col0, &mat->col0 );
+    vmathV4GetXYZ( &result->col1, &mat->col1 );
+    vmathV4GetXYZ( &result->col2, &mat->col2 );
+}
+
+static inline void vmathM4SetTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec )
+{
+    vmathV4SetXYZ( &result->col3, translateVec );
+}
+
+static inline void vmathM4GetTranslation( VmathVector3 *result, const VmathMatrix4 *mat )
+{
+    vmathV4GetXYZ( result, &mat->col3 );
+}
+
+static inline void vmathM4MakeRotationX( VmathMatrix4 *result, float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    vmathV4MakeXAxis( &result->col0 );
+    vmathV4MakeFromElems( &result->col1, 0.0f, c, s, 0.0f );
+    vmathV4MakeFromElems( &result->col2, 0.0f, -s, c, 0.0f );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationY( VmathMatrix4 *result, float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    vmathV4MakeFromElems( &result->col0, c, 0.0f, -s, 0.0f );
+    vmathV4MakeYAxis( &result->col1 );
+    vmathV4MakeFromElems( &result->col2, s, 0.0f, c, 0.0f );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationZ( VmathMatrix4 *result, float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    vmathV4MakeFromElems( &result->col0, c, s, 0.0f, 0.0f );
+    vmathV4MakeFromElems( &result->col1, -s, c, 0.0f, 0.0f );
+    vmathV4MakeZAxis( &result->col2 );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationZYX( VmathMatrix4 *result, const VmathVector3 *radiansXYZ )
+{
+    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sX = sinf( radiansXYZ->x );
+    cX = cosf( radiansXYZ->x );
+    sY = sinf( radiansXYZ->y );
+    cY = cosf( radiansXYZ->y );
+    sZ = sinf( radiansXYZ->z );
+    cZ = cosf( radiansXYZ->z );
+    tmp0 = ( cZ * sY );
+    tmp1 = ( sZ * sY );
+    vmathV4MakeFromElems( &result->col0, ( cZ * cY ), ( sZ * cY ), -sY, 0.0f );
+    vmathV4MakeFromElems( &result->col1, ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ), 0.0f );
+    vmathV4MakeFromElems( &result->col2, ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ), 0.0f );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationAxis( VmathMatrix4 *result, float radians, const VmathVector3 *unitVec )
+{
+    float x, y, z, s, c, oneMinusC, xy, yz, zx;
+    s = sinf( radians );
+    c = cosf( radians );
+    x = unitVec->x;
+    y = unitVec->y;
+    z = unitVec->z;
+    xy = ( x * y );
+    yz = ( y * z );
+    zx = ( z * x );
+    oneMinusC = ( 1.0f - c );
+    vmathV4MakeFromElems( &result->col0, ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ), 0.0f );
+    vmathV4MakeFromElems( &result->col1, ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ), 0.0f );
+    vmathV4MakeFromElems( &result->col2, ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ), 0.0f );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationQ( VmathMatrix4 *result, const VmathQuat *unitQuat )
+{
+    VmathTransform3 tmpT3_0;
+    vmathT3MakeRotationQ( &tmpT3_0, unitQuat );
+    vmathM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline void vmathM4MakeScale( VmathMatrix4 *result, const VmathVector3 *scaleVec )
+{
+    vmathV4MakeFromElems( &result->col0, scaleVec->x, 0.0f, 0.0f, 0.0f );
+    vmathV4MakeFromElems( &result->col1, 0.0f, scaleVec->y, 0.0f, 0.0f );
+    vmathV4MakeFromElems( &result->col2, 0.0f, 0.0f, scaleVec->z, 0.0f );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4AppendScale( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathVector3 *scaleVec )
+{
+    vmathV4ScalarMul( &result->col0, &mat->col0, vmathV3GetX( scaleVec ) );
+    vmathV4ScalarMul( &result->col1, &mat->col1, vmathV3GetY( scaleVec ) );
+    vmathV4ScalarMul( &result->col2, &mat->col2, vmathV3GetZ( scaleVec ) );
+    vmathV4Copy( &result->col3, &mat->col3 );
+}
+
+static inline void vmathM4PrependScale( VmathMatrix4 *result, const VmathVector3 *scaleVec, const VmathMatrix4 *mat )
+{
+    VmathVector4 scale4;
+    vmathV4MakeFromV3Scalar( &scale4, scaleVec, 1.0f );
+    vmathV4MulPerElem( &result->col0, &mat->col0, &scale4 );
+    vmathV4MulPerElem( &result->col1, &mat->col1, &scale4 );
+    vmathV4MulPerElem( &result->col2, &mat->col2, &scale4 );
+    vmathV4MulPerElem( &result->col3, &mat->col3, &scale4 );
+}
+
+static inline void vmathM4MakeTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec )
+{
+    vmathV4MakeXAxis( &result->col0 );
+    vmathV4MakeYAxis( &result->col1 );
+    vmathV4MakeZAxis( &result->col2 );
+    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
+}
+
+static inline void vmathM4MakeLookAt( VmathMatrix4 *result, const VmathPoint3 *eyePos, const VmathPoint3 *lookAtPos, const VmathVector3 *upVec )
+{
+    VmathMatrix4 m4EyeFrame;
+    VmathVector3 v3X, v3Y, v3Z, tmpV3_0, tmpV3_1;
+    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
+    vmathV3Normalize( &v3Y, upVec );
+    vmathP3Sub( &tmpV3_0, eyePos, lookAtPos );
+    vmathV3Normalize( &v3Z, &tmpV3_0 );
+    vmathV3Cross( &tmpV3_1, &v3Y, &v3Z );
+    vmathV3Normalize( &v3X, &tmpV3_1 );
+    vmathV3Cross( &v3Y, &v3Z, &v3X );
+    vmathV4MakeFromV3( &tmpV4_0, &v3X );
+    vmathV4MakeFromV3( &tmpV4_1, &v3Y );
+    vmathV4MakeFromV3( &tmpV4_2, &v3Z );
+    vmathV4MakeFromP3( &tmpV4_3, eyePos );
+    vmathM4MakeFromCols( &m4EyeFrame, &tmpV4_0, &tmpV4_1, &tmpV4_2, &tmpV4_3 );
+    vmathM4OrthoInverse( result, &m4EyeFrame );
+}
+
+static inline void vmathM4MakePerspective( VmathMatrix4 *result, float fovyRadians, float aspect, float zNear, float zFar )
+{
+    float f, rangeInv;
+    f = tanf( ( (float)( _VECTORMATH_PI_OVER_2 ) - ( 0.5f * fovyRadians ) ) );
+    rangeInv = ( 1.0f / ( zNear - zFar ) );
+    vmathV4MakeFromElems( &result->col0, ( f / aspect ), 0.0f, 0.0f, 0.0f );
+    vmathV4MakeFromElems( &result->col1, 0.0f, f, 0.0f, 0.0f );
+    vmathV4MakeFromElems( &result->col2, 0.0f, 0.0f, ( ( zNear + zFar ) * rangeInv ), -1.0f );
+    vmathV4MakeFromElems( &result->col3, 0.0f, 0.0f, ( ( ( zNear * zFar ) * rangeInv ) * 2.0f ), 0.0f );
+}
+
+static inline void vmathM4MakeFrustum( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
+    sum_rl = ( right + left );
+    sum_tb = ( top + bottom );
+    sum_nf = ( zNear + zFar );
+    inv_rl = ( 1.0f / ( right - left ) );
+    inv_tb = ( 1.0f / ( top - bottom ) );
+    inv_nf = ( 1.0f / ( zNear - zFar ) );
+    n2 = ( zNear + zNear );
+    vmathV4MakeFromElems( &result->col0, ( n2 * inv_rl ), 0.0f, 0.0f, 0.0f );
+    vmathV4MakeFromElems( &result->col1, 0.0f, ( n2 * inv_tb ), 0.0f, 0.0f );
+    vmathV4MakeFromElems( &result->col2, ( sum_rl * inv_rl ), ( sum_tb * inv_tb ), ( sum_nf * inv_nf ), -1.0f );
+    vmathV4MakeFromElems( &result->col3, 0.0f, 0.0f, ( ( n2 * inv_nf ) * zFar ), 0.0f );
+}
+
+static inline void vmathM4MakeOrthographic( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
+    sum_rl = ( right + left );
+    sum_tb = ( top + bottom );
+    sum_nf = ( zNear + zFar );
+    inv_rl = ( 1.0f / ( right - left ) );
+    inv_tb = ( 1.0f / ( top - bottom ) );
+    inv_nf = ( 1.0f / ( zNear - zFar ) );
+    vmathV4MakeFromElems( &result->col0, ( inv_rl + inv_rl ), 0.0f, 0.0f, 0.0f );
+    vmathV4MakeFromElems( &result->col1, 0.0f, ( inv_tb + inv_tb ), 0.0f, 0.0f );
+    vmathV4MakeFromElems( &result->col2, 0.0f, 0.0f, ( inv_nf + inv_nf ), 0.0f );
+    vmathV4MakeFromElems( &result->col3, ( -sum_rl * inv_rl ), ( -sum_tb * inv_tb ), ( sum_nf * inv_nf ), 1.0f );
+}
+
+static inline void vmathM4Select( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, unsigned int select1 )
+{
+    vmathV4Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
+    vmathV4Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
+    vmathV4Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
+    vmathV4Select( &result->col3, &mat0->col3, &mat1->col3, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathM4Print( const VmathMatrix4 *mat )
+{
+    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
+    vmathM4GetRow( &tmpV4_0, mat, 0 );
+    vmathV4Print( &tmpV4_0 );
+    vmathM4GetRow( &tmpV4_1, mat, 1 );
+    vmathV4Print( &tmpV4_1 );
+    vmathM4GetRow( &tmpV4_2, mat, 2 );
+    vmathV4Print( &tmpV4_2 );
+    vmathM4GetRow( &tmpV4_3, mat, 3 );
+    vmathV4Print( &tmpV4_3 );
+}
+
+static inline void vmathM4Prints( const VmathMatrix4 *mat, const char *name )
+{
+    printf("%s:\n", name);
+    vmathM4Print( mat );
+}
+
+#endif
+
+static inline void vmathT3Copy( VmathTransform3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( &result->col0, &tfrm->col0 );
+    vmathV3Copy( &result->col1, &tfrm->col1 );
+    vmathV3Copy( &result->col2, &tfrm->col2 );
+    vmathV3Copy( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathT3MakeFromScalar( VmathTransform3 *result, float scalar )
+{
+    vmathV3MakeFromScalar( &result->col0, scalar );
+    vmathV3MakeFromScalar( &result->col1, scalar );
+    vmathV3MakeFromScalar( &result->col2, scalar );
+    vmathV3MakeFromScalar( &result->col3, scalar );
+}
+
+static inline void vmathT3MakeFromCols( VmathTransform3 *result, const VmathVector3 *_col0, const VmathVector3 *_col1, const VmathVector3 *_col2, const VmathVector3 *_col3 )
+{
+    vmathV3Copy( &result->col0, _col0 );
+    vmathV3Copy( &result->col1, _col1 );
+    vmathV3Copy( &result->col2, _col2 );
+    vmathV3Copy( &result->col3, _col3 );
+}
+
+static inline void vmathT3MakeFromM3V3( VmathTransform3 *result, const VmathMatrix3 *tfrm, const VmathVector3 *translateVec )
+{
+    vmathT3SetUpper3x3( result, tfrm );
+    vmathT3SetTranslation( result, translateVec );
+}
+
+static inline void vmathT3MakeFromQV3( VmathTransform3 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec )
+{
+    VmathMatrix3 tmpM3_0;
+    vmathM3MakeFromQ( &tmpM3_0, unitQuat );
+    vmathT3SetUpper3x3( result, &tmpM3_0 );
+    vmathT3SetTranslation( result, translateVec );
+}
+
+static inline void vmathT3SetCol0( VmathTransform3 *result, const VmathVector3 *_col0 )
+{
+    vmathV3Copy( &result->col0, _col0 );
+}
+
+static inline void vmathT3SetCol1( VmathTransform3 *result, const VmathVector3 *_col1 )
+{
+    vmathV3Copy( &result->col1, _col1 );
+}
+
+static inline void vmathT3SetCol2( VmathTransform3 *result, const VmathVector3 *_col2 )
+{
+    vmathV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathT3SetCol3( VmathTransform3 *result, const VmathVector3 *_col3 )
+{
+    vmathV3Copy( &result->col3, _col3 );
+}
+
+static inline void vmathT3SetCol( VmathTransform3 *result, int col, const VmathVector3 *vec )
+{
+    vmathV3Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathT3SetRow( VmathTransform3 *result, int row, const VmathVector4 *vec )
+{
+    vmathV3SetElem( &result->col0, row, vmathV4GetElem( vec, 0 ) );
+    vmathV3SetElem( &result->col1, row, vmathV4GetElem( vec, 1 ) );
+    vmathV3SetElem( &result->col2, row, vmathV4GetElem( vec, 2 ) );
+    vmathV3SetElem( &result->col3, row, vmathV4GetElem( vec, 3 ) );
+}
+
+static inline void vmathT3SetElem( VmathTransform3 *result, int col, int row, float val )
+{
+    VmathVector3 tmpV3_0;
+    vmathT3GetCol( &tmpV3_0, result, col );
+    vmathV3SetElem( &tmpV3_0, row, val );
+    vmathT3SetCol( result, col, &tmpV3_0 );
+}
+
+static inline float vmathT3GetElem( const VmathTransform3 *tfrm, int col, int row )
+{
+    VmathVector3 tmpV3_0;
+    vmathT3GetCol( &tmpV3_0, tfrm, col );
+    return vmathV3GetElem( &tmpV3_0, row );
+}
+
+static inline void vmathT3GetCol0( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col0 );
+}
+
+static inline void vmathT3GetCol1( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col1 );
+}
+
+static inline void vmathT3GetCol2( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col2 );
+}
+
+static inline void vmathT3GetCol3( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col3 );
+}
+
+static inline void vmathT3GetCol( VmathVector3 *result, const VmathTransform3 *tfrm, int col )
+{
+    vmathV3Copy( result, (&tfrm->col0 + col) );
+}
+
+static inline void vmathT3GetRow( VmathVector4 *result, const VmathTransform3 *tfrm, int row )
+{
+    vmathV4MakeFromElems( result, vmathV3GetElem( &tfrm->col0, row ), vmathV3GetElem( &tfrm->col1, row ), vmathV3GetElem( &tfrm->col2, row ), vmathV3GetElem( &tfrm->col3, row ) );
+}
+
+static inline void vmathT3Inverse( VmathTransform3 *result, const VmathTransform3 *tfrm )
+{
+    VmathVector3 tmp0, tmp1, tmp2, inv0, inv1, inv2, tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5;
+    float detinv;
+    vmathV3Cross( &tmp0, &tfrm->col1, &tfrm->col2 );
+    vmathV3Cross( &tmp1, &tfrm->col2, &tfrm->col0 );
+    vmathV3Cross( &tmp2, &tfrm->col0, &tfrm->col1 );
+    detinv = ( 1.0f / vmathV3Dot( &tfrm->col2, &tmp2 ) );
+    vmathV3MakeFromElems( &inv0, ( tmp0.x * detinv ), ( tmp1.x * detinv ), ( tmp2.x * detinv ) );
+    vmathV3MakeFromElems( &inv1, ( tmp0.y * detinv ), ( tmp1.y * detinv ), ( tmp2.y * detinv ) );
+    vmathV3MakeFromElems( &inv2, ( tmp0.z * detinv ), ( tmp1.z * detinv ), ( tmp2.z * detinv ) );
+    vmathV3Copy( &result->col0, &inv0 );
+    vmathV3Copy( &result->col1, &inv1 );
+    vmathV3Copy( &result->col2, &inv2 );
+    vmathV3ScalarMul( &tmpV3_0, &inv0, tfrm->col3.x );
+    vmathV3ScalarMul( &tmpV3_1, &inv1, tfrm->col3.y );
+    vmathV3ScalarMul( &tmpV3_2, &inv2, tfrm->col3.z );
+    vmathV3Add( &tmpV3_3, &tmpV3_1, &tmpV3_2 );
+    vmathV3Add( &tmpV3_4, &tmpV3_0, &tmpV3_3 );
+    vmathV3Neg( &tmpV3_5, &tmpV3_4 );
+    vmathV3Copy( &result->col3, &tmpV3_5 );
+}
+
+static inline void vmathT3OrthoInverse( VmathTransform3 *result, const VmathTransform3 *tfrm )
+{
+    VmathVector3 inv0, inv1, inv2, tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5;
+    vmathV3MakeFromElems( &inv0, tfrm->col0.x, tfrm->col1.x, tfrm->col2.x );
+    vmathV3MakeFromElems( &inv1, tfrm->col0.y, tfrm->col1.y, tfrm->col2.y );
+    vmathV3MakeFromElems( &inv2, tfrm->col0.z, tfrm->col1.z, tfrm->col2.z );
+    vmathV3Copy( &result->col0, &inv0 );
+    vmathV3Copy( &result->col1, &inv1 );
+    vmathV3Copy( &result->col2, &inv2 );
+    vmathV3ScalarMul( &tmpV3_0, &inv0, tfrm->col3.x );
+    vmathV3ScalarMul( &tmpV3_1, &inv1, tfrm->col3.y );
+    vmathV3ScalarMul( &tmpV3_2, &inv2, tfrm->col3.z );
+    vmathV3Add( &tmpV3_3, &tmpV3_1, &tmpV3_2 );
+    vmathV3Add( &tmpV3_4, &tmpV3_0, &tmpV3_3 );
+    vmathV3Neg( &tmpV3_5, &tmpV3_4 );
+    vmathV3Copy( &result->col3, &tmpV3_5 );
+}
+
+static inline void vmathT3AbsPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3AbsPerElem( &result->col0, &tfrm->col0 );
+    vmathV3AbsPerElem( &result->col1, &tfrm->col1 );
+    vmathV3AbsPerElem( &result->col2, &tfrm->col2 );
+    vmathV3AbsPerElem( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathT3MulV3( VmathVector3 *result, const VmathTransform3 *tfrm, const VmathVector3 *vec )
+{
+    float tmpX, tmpY, tmpZ;
+    tmpX = ( ( ( tfrm->col0.x * vec->x ) + ( tfrm->col1.x * vec->y ) ) + ( tfrm->col2.x * vec->z ) );
+    tmpY = ( ( ( tfrm->col0.y * vec->x ) + ( tfrm->col1.y * vec->y ) ) + ( tfrm->col2.y * vec->z ) );
+    tmpZ = ( ( ( tfrm->col0.z * vec->x ) + ( tfrm->col1.z * vec->y ) ) + ( tfrm->col2.z * vec->z ) );
+    vmathV3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathT3MulP3( VmathPoint3 *result, const VmathTransform3 *tfrm, const VmathPoint3 *pnt )
+{
+    float tmpX, tmpY, tmpZ;
+    tmpX = ( ( ( ( tfrm->col0.x * pnt->x ) + ( tfrm->col1.x * pnt->y ) ) + ( tfrm->col2.x * pnt->z ) ) + tfrm->col3.x );
+    tmpY = ( ( ( ( tfrm->col0.y * pnt->x ) + ( tfrm->col1.y * pnt->y ) ) + ( tfrm->col2.y * pnt->z ) ) + tfrm->col3.y );
+    tmpZ = ( ( ( ( tfrm->col0.z * pnt->x ) + ( tfrm->col1.z * pnt->y ) ) + ( tfrm->col2.z * pnt->z ) ) + tfrm->col3.z );
+    vmathP3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathT3Mul( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 )
+{
+    VmathTransform3 tmpResult;
+    VmathPoint3 tmpP3_0, tmpP3_1;
+    vmathT3MulV3( &tmpResult.col0, tfrm0, &tfrm1->col0 );
+    vmathT3MulV3( &tmpResult.col1, tfrm0, &tfrm1->col1 );
+    vmathT3MulV3( &tmpResult.col2, tfrm0, &tfrm1->col2 );
+    vmathP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
+    vmathT3MulP3( &tmpP3_1, tfrm0, &tmpP3_0 );
+    vmathV3MakeFromP3( &tmpResult.col3, &tmpP3_1 );
+    vmathT3Copy( result, &tmpResult );
+}
+
+static inline void vmathT3MulPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 )
+{
+    vmathV3MulPerElem( &result->col0, &tfrm0->col0, &tfrm1->col0 );
+    vmathV3MulPerElem( &result->col1, &tfrm0->col1, &tfrm1->col1 );
+    vmathV3MulPerElem( &result->col2, &tfrm0->col2, &tfrm1->col2 );
+    vmathV3MulPerElem( &result->col3, &tfrm0->col3, &tfrm1->col3 );
+}
+
+static inline void vmathT3MakeIdentity( VmathTransform3 *result )
+{
+    vmathV3MakeXAxis( &result->col0 );
+    vmathV3MakeYAxis( &result->col1 );
+    vmathV3MakeZAxis( &result->col2 );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3SetUpper3x3( VmathTransform3 *result, const VmathMatrix3 *tfrm )
+{
+    vmathV3Copy( &result->col0, &tfrm->col0 );
+    vmathV3Copy( &result->col1, &tfrm->col1 );
+    vmathV3Copy( &result->col2, &tfrm->col2 );
+}
+
+static inline void vmathT3GetUpper3x3( VmathMatrix3 *result, const VmathTransform3 *tfrm )
+{
+    vmathM3MakeFromCols( result, &tfrm->col0, &tfrm->col1, &tfrm->col2 );
+}
+
+static inline void vmathT3SetTranslation( VmathTransform3 *result, const VmathVector3 *translateVec )
+{
+    vmathV3Copy( &result->col3, translateVec );
+}
+
+static inline void vmathT3GetTranslation( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col3 );
+}
+
+static inline void vmathT3MakeRotationX( VmathTransform3 *result, float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    vmathV3MakeXAxis( &result->col0 );
+    vmathV3MakeFromElems( &result->col1, 0.0f, c, s );
+    vmathV3MakeFromElems( &result->col2, 0.0f, -s, c );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3MakeRotationY( VmathTransform3 *result, float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    vmathV3MakeFromElems( &result->col0, c, 0.0f, -s );
+    vmathV3MakeYAxis( &result->col1 );
+    vmathV3MakeFromElems( &result->col2, s, 0.0f, c );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3MakeRotationZ( VmathTransform3 *result, float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    vmathV3MakeFromElems( &result->col0, c, s, 0.0f );
+    vmathV3MakeFromElems( &result->col1, -s, c, 0.0f );
+    vmathV3MakeZAxis( &result->col2 );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3MakeRotationZYX( VmathTransform3 *result, const VmathVector3 *radiansXYZ )
+{
+    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sX = sinf( radiansXYZ->x );
+    cX = cosf( radiansXYZ->x );
+    sY = sinf( radiansXYZ->y );
+    cY = cosf( radiansXYZ->y );
+    sZ = sinf( radiansXYZ->z );
+    cZ = cosf( radiansXYZ->z );
+    tmp0 = ( cZ * sY );
+    tmp1 = ( sZ * sY );
+    vmathV3MakeFromElems( &result->col0, ( cZ * cY ), ( sZ * cY ), -sY );
+    vmathV3MakeFromElems( &result->col1, ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) );
+    vmathV3MakeFromElems( &result->col2, ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3MakeRotationAxis( VmathTransform3 *result, float radians, const VmathVector3 *unitVec )
+{
+    VmathMatrix3 tmpM3_0;
+    VmathVector3 tmpV3_0;
+    vmathM3MakeRotationAxis( &tmpM3_0, radians, unitVec );
+    vmathV3MakeFromScalar( &tmpV3_0, 0.0f );
+    vmathT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
+}
+
+static inline void vmathT3MakeRotationQ( VmathTransform3 *result, const VmathQuat *unitQuat )
+{
+    VmathMatrix3 tmpM3_0;
+    VmathVector3 tmpV3_0;
+    vmathM3MakeFromQ( &tmpM3_0, unitQuat );
+    vmathV3MakeFromScalar( &tmpV3_0, 0.0f );
+    vmathT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
+}
+
+static inline void vmathT3MakeScale( VmathTransform3 *result, const VmathVector3 *scaleVec )
+{
+    vmathV3MakeFromElems( &result->col0, scaleVec->x, 0.0f, 0.0f );
+    vmathV3MakeFromElems( &result->col1, 0.0f, scaleVec->y, 0.0f );
+    vmathV3MakeFromElems( &result->col2, 0.0f, 0.0f, scaleVec->z );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3AppendScale( VmathTransform3 *result, const VmathTransform3 *tfrm, const VmathVector3 *scaleVec )
+{
+    vmathV3ScalarMul( &result->col0, &tfrm->col0, vmathV3GetX( scaleVec ) );
+    vmathV3ScalarMul( &result->col1, &tfrm->col1, vmathV3GetY( scaleVec ) );
+    vmathV3ScalarMul( &result->col2, &tfrm->col2, vmathV3GetZ( scaleVec ) );
+    vmathV3Copy( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathT3PrependScale( VmathTransform3 *result, const VmathVector3 *scaleVec, const VmathTransform3 *tfrm )
+{
+    vmathV3MulPerElem( &result->col0, &tfrm->col0, scaleVec );
+    vmathV3MulPerElem( &result->col1, &tfrm->col1, scaleVec );
+    vmathV3MulPerElem( &result->col2, &tfrm->col2, scaleVec );
+    vmathV3MulPerElem( &result->col3, &tfrm->col3, scaleVec );
+}
+
+static inline void vmathT3MakeTranslation( VmathTransform3 *result, const VmathVector3 *translateVec )
+{
+    vmathV3MakeXAxis( &result->col0 );
+    vmathV3MakeYAxis( &result->col1 );
+    vmathV3MakeZAxis( &result->col2 );
+    vmathV3Copy( &result->col3, translateVec );
+}
+
+static inline void vmathT3Select( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, unsigned int select1 )
+{
+    vmathV3Select( &result->col0, &tfrm0->col0, &tfrm1->col0, select1 );
+    vmathV3Select( &result->col1, &tfrm0->col1, &tfrm1->col1, select1 );
+    vmathV3Select( &result->col2, &tfrm0->col2, &tfrm1->col2, select1 );
+    vmathV3Select( &result->col3, &tfrm0->col3, &tfrm1->col3, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathT3Print( const VmathTransform3 *tfrm )
+{
+    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2;
+    vmathT3GetRow( &tmpV4_0, tfrm, 0 );
+    vmathV4Print( &tmpV4_0 );
+    vmathT3GetRow( &tmpV4_1, tfrm, 1 );
+    vmathV4Print( &tmpV4_1 );
+    vmathT3GetRow( &tmpV4_2, tfrm, 2 );
+    vmathV4Print( &tmpV4_2 );
+}
+
+static inline void vmathT3Prints( const VmathTransform3 *tfrm, const char *name )
+{
+    printf("%s:\n", name);
+    vmathT3Print( tfrm );
+}
+
+#endif
+
+static inline void vmathQMakeFromM3( VmathQuat *result, const VmathMatrix3 *tfrm )
+{
+    float trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
+    int negTrace, ZgtX, ZgtY, YgtX;
+    int largestXorY, largestYorZ, largestZorX;
+
+    xx = tfrm->col0.x;
+    yx = tfrm->col0.y;
+    zx = tfrm->col0.z;
+    xy = tfrm->col1.x;
+    yy = tfrm->col1.y;
+    zy = tfrm->col1.z;
+    xz = tfrm->col2.x;
+    yz = tfrm->col2.y;
+    zz = tfrm->col2.z;
+
+    trace = ( ( xx + yy ) + zz );
+
+    negTrace = ( trace < 0.0f );
+    ZgtX = zz > xx;
+    ZgtY = zz > yy;
+    YgtX = yy > xx;
+    largestXorY = ( !ZgtX || !ZgtY ) && negTrace;
+    largestYorZ = ( YgtX || ZgtX ) && negTrace;
+    largestZorX = ( ZgtY || !YgtX ) && negTrace;
+    
+    if ( largestXorY )
+    {
+        zz = -zz;
+        xy = -xy;
+    }
+    if ( largestYorZ )
+    {
+        xx = -xx;
+        yz = -yz;
+    }
+    if ( largestZorX )
+    {
+        yy = -yy;
+        zx = -zx;
+    }
+
+    radicand = ( ( ( xx + yy ) + zz ) + 1.0f );
+    scale = ( 0.5f * ( 1.0f / sqrtf( radicand ) ) );
+
+    tmpx = ( ( zy - yz ) * scale );
+    tmpy = ( ( xz - zx ) * scale );
+    tmpz = ( ( yx - xy ) * scale );
+    tmpw = ( radicand * scale );
+    qx = tmpx;
+    qy = tmpy;
+    qz = tmpz;
+    qw = tmpw;
+
+    if ( largestXorY )
+    {
+        qx = tmpw;
+        qy = tmpz;
+        qz = tmpy;
+        qw = tmpx;
+    }
+    if ( largestYorZ )
+    {
+        tmpx = qx;
+        tmpz = qz;
+        qx = qy;
+        qy = tmpx;
+        qz = qw;
+        qw = tmpz;
+    }
+
+    result->x = qx;
+    result->y = qy;
+    result->z = qz;
+    result->w = qw;
+}
+
+static inline void vmathV3Outer( VmathMatrix3 *result, const VmathVector3 *tfrm0, const VmathVector3 *tfrm1 )
+{
+    vmathV3ScalarMul( &result->col0, tfrm0, vmathV3GetX( tfrm1 ) );
+    vmathV3ScalarMul( &result->col1, tfrm0, vmathV3GetY( tfrm1 ) );
+    vmathV3ScalarMul( &result->col2, tfrm0, vmathV3GetZ( tfrm1 ) );
+}
+
+static inline void vmathV4Outer( VmathMatrix4 *result, const VmathVector4 *tfrm0, const VmathVector4 *tfrm1 )
+{
+    vmathV4ScalarMul( &result->col0, tfrm0, vmathV4GetX( tfrm1 ) );
+    vmathV4ScalarMul( &result->col1, tfrm0, vmathV4GetY( tfrm1 ) );
+    vmathV4ScalarMul( &result->col2, tfrm0, vmathV4GetZ( tfrm1 ) );
+    vmathV4ScalarMul( &result->col3, tfrm0, vmathV4GetW( tfrm1 ) );
+}
+
+static inline void vmathV3RowMul( VmathVector3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat )
+{
+    float tmpX, tmpY, tmpZ;
+    tmpX = ( ( ( vec->x * mat->col0.x ) + ( vec->y * mat->col0.y ) ) + ( vec->z * mat->col0.z ) );
+    tmpY = ( ( ( vec->x * mat->col1.x ) + ( vec->y * mat->col1.y ) ) + ( vec->z * mat->col1.z ) );
+    tmpZ = ( ( ( vec->x * mat->col2.x ) + ( vec->y * mat->col2.y ) ) + ( vec->z * mat->col2.z ) );
+    vmathV3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathV3CrossMatrix( VmathMatrix3 *result, const VmathVector3 *vec )
+{
+    vmathV3MakeFromElems( &result->col0, 0.0f, vec->z, -vec->y );
+    vmathV3MakeFromElems( &result->col1, -vec->z, 0.0f, vec->x );
+    vmathV3MakeFromElems( &result->col2, vec->y, -vec->x, 0.0f );
+}
+
+static inline void vmathV3CrossMatrixMul( VmathMatrix3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat )
+{
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2;
+    vmathV3Cross( &tmpV3_0, vec, &mat->col0 );
+    vmathV3Cross( &tmpV3_1, vec, &mat->col1 );
+    vmathV3Cross( &tmpV3_2, vec, &mat->col2 );
+    vmathM3MakeFromCols( result, &tmpV3_0, &tmpV3_1, &tmpV3_2 );
+}
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/scalar/c/mat_aos_v.h b/Extras/vectormathlibrary/include/vectormath/scalar/c/mat_aos_v.h
index 6d1c3cd7c..270ffcb50 100644
--- a/Extras/vectormathlibrary/include/vectormath/scalar/c/mat_aos_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/scalar/c/mat_aos_v.h
@@ -1,1006 +1,1006 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_AOS_V_C_H
-#define _VECTORMATH_MAT_AOS_V_C_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- */
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-static inline VmathMatrix3 vmathM3MakeFromScalar_V( float scalar )
-{
-    VmathMatrix3 result;
-    vmathM3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeFromQ_V( VmathQuat unitQuat )
-{
-    VmathMatrix3 result;
-    vmathM3MakeFromQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeFromCols_V( VmathVector3 _col0, VmathVector3 _col1, VmathVector3 _col2 )
-{
-    VmathMatrix3 result;
-    vmathM3MakeFromCols(&result, &_col0, &_col1, &_col2);
-    return result;
-}
-
-static inline void vmathM3SetCol0_V( VmathMatrix3 *result, VmathVector3 _col0 )
-{
-    vmathM3SetCol0(result, &_col0);
-}
-
-static inline void vmathM3SetCol1_V( VmathMatrix3 *result, VmathVector3 _col1 )
-{
-    vmathM3SetCol1(result, &_col1);
-}
-
-static inline void vmathM3SetCol2_V( VmathMatrix3 *result, VmathVector3 _col2 )
-{
-    vmathM3SetCol2(result, &_col2);
-}
-
-static inline void vmathM3SetCol_V( VmathMatrix3 *result, int col, VmathVector3 vec )
-{
-    vmathM3SetCol(result, col, &vec);
-}
-
-static inline void vmathM3SetRow_V( VmathMatrix3 *result, int row, VmathVector3 vec )
-{
-    vmathM3SetRow(result, row, &vec);
-}
-
-static inline void vmathM3SetElem_V( VmathMatrix3 *result, int col, int row, float val )
-{
-    vmathM3SetElem(result, col, row, val);
-}
-
-static inline float vmathM3GetElem_V( VmathMatrix3 mat, int col, int row )
-{
-    return vmathM3GetElem(&mat, col, row);
-}
-
-static inline VmathVector3 vmathM3GetCol0_V( VmathMatrix3 mat )
-{
-    VmathVector3 result;
-    vmathM3GetCol0(&result, &mat);
-    return result;
-}
-
-static inline VmathVector3 vmathM3GetCol1_V( VmathMatrix3 mat )
-{
-    VmathVector3 result;
-    vmathM3GetCol1(&result, &mat);
-    return result;
-}
-
-static inline VmathVector3 vmathM3GetCol2_V( VmathMatrix3 mat )
-{
-    VmathVector3 result;
-    vmathM3GetCol2(&result, &mat);
-    return result;
-}
-
-static inline VmathVector3 vmathM3GetCol_V( VmathMatrix3 mat, int col )
-{
-    VmathVector3 result;
-    vmathM3GetCol(&result, &mat, col);
-    return result;
-}
-
-static inline VmathVector3 vmathM3GetRow_V( VmathMatrix3 mat, int row )
-{
-    VmathVector3 result;
-    vmathM3GetRow(&result, &mat, row);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Transpose_V( VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3Transpose(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Inverse_V( VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3Inverse(&result, &mat);
-    return result;
-}
-
-static inline float vmathM3Determinant_V( VmathMatrix3 mat )
-{
-    return vmathM3Determinant(&mat);
-}
-
-static inline VmathMatrix3 vmathM3Add_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
-{
-    VmathMatrix3 result;
-    vmathM3Add(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Sub_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
-{
-    VmathMatrix3 result;
-    vmathM3Sub(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Neg_V( VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3Neg(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3AbsPerElem_V( VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3AbsPerElem(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3ScalarMul_V( VmathMatrix3 mat, float scalar )
-{
-    VmathMatrix3 result;
-    vmathM3ScalarMul(&result, &mat, scalar);
-    return result;
-}
-
-static inline VmathVector3 vmathM3MulV3_V( VmathMatrix3 mat, VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathM3MulV3(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Mul_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
-{
-    VmathMatrix3 result;
-    vmathM3Mul(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MulPerElem_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
-{
-    VmathMatrix3 result;
-    vmathM3MulPerElem(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeIdentity_V( )
-{
-    VmathMatrix3 result;
-    vmathM3MakeIdentity(&result);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationX_V( float radians )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationY_V( float radians )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationZ_V( float radians )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationZYX_V( VmathVector3 radiansXYZ )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationAxis_V( float radians, VmathVector3 unitVec )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationQ_V( VmathQuat unitQuat )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeScale_V( VmathVector3 scaleVec )
-{
-    VmathMatrix3 result;
-    vmathM3MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3AppendScale_V( VmathMatrix3 mat, VmathVector3 scaleVec )
-{
-    VmathMatrix3 result;
-    vmathM3AppendScale(&result, &mat, &scaleVec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3PrependScale_V( VmathVector3 scaleVec, VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3PrependScale(&result, &scaleVec, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Select_V( VmathMatrix3 mat0, VmathMatrix3 mat1, unsigned int select1 )
-{
-    VmathMatrix3 result;
-    vmathM3Select(&result, &mat0, &mat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathM3Print_V( VmathMatrix3 mat )
-{
-    vmathM3Print(&mat);
-}
-
-static inline void vmathM3Prints_V( VmathMatrix3 mat, const char *name )
-{
-    vmathM3Prints(&mat, name);
-}
-
-#endif
-
-static inline VmathMatrix4 vmathM4MakeFromScalar_V( float scalar )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFromT3_V( VmathTransform3 mat )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromT3(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFromCols_V( VmathVector4 _col0, VmathVector4 _col1, VmathVector4 _col2, VmathVector4 _col3 )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFromM3V3_V( VmathMatrix3 mat, VmathVector3 translateVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromM3V3(&result, &mat, &translateVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromQV3(&result, &unitQuat, &translateVec);
-    return result;
-}
-
-static inline void vmathM4SetCol0_V( VmathMatrix4 *result, VmathVector4 _col0 )
-{
-    vmathM4SetCol0(result, &_col0);
-}
-
-static inline void vmathM4SetCol1_V( VmathMatrix4 *result, VmathVector4 _col1 )
-{
-    vmathM4SetCol1(result, &_col1);
-}
-
-static inline void vmathM4SetCol2_V( VmathMatrix4 *result, VmathVector4 _col2 )
-{
-    vmathM4SetCol2(result, &_col2);
-}
-
-static inline void vmathM4SetCol3_V( VmathMatrix4 *result, VmathVector4 _col3 )
-{
-    vmathM4SetCol3(result, &_col3);
-}
-
-static inline void vmathM4SetCol_V( VmathMatrix4 *result, int col, VmathVector4 vec )
-{
-    vmathM4SetCol(result, col, &vec);
-}
-
-static inline void vmathM4SetRow_V( VmathMatrix4 *result, int row, VmathVector4 vec )
-{
-    vmathM4SetRow(result, row, &vec);
-}
-
-static inline void vmathM4SetElem_V( VmathMatrix4 *result, int col, int row, float val )
-{
-    vmathM4SetElem(result, col, row, val);
-}
-
-static inline float vmathM4GetElem_V( VmathMatrix4 mat, int col, int row )
-{
-    return vmathM4GetElem(&mat, col, row);
-}
-
-static inline VmathVector4 vmathM4GetCol0_V( VmathMatrix4 mat )
-{
-    VmathVector4 result;
-    vmathM4GetCol0(&result, &mat);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetCol1_V( VmathMatrix4 mat )
-{
-    VmathVector4 result;
-    vmathM4GetCol1(&result, &mat);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetCol2_V( VmathMatrix4 mat )
-{
-    VmathVector4 result;
-    vmathM4GetCol2(&result, &mat);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetCol3_V( VmathMatrix4 mat )
-{
-    VmathVector4 result;
-    vmathM4GetCol3(&result, &mat);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetCol_V( VmathMatrix4 mat, int col )
-{
-    VmathVector4 result;
-    vmathM4GetCol(&result, &mat, col);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetRow_V( VmathMatrix4 mat, int row )
-{
-    VmathVector4 result;
-    vmathM4GetRow(&result, &mat, row);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Transpose_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4Transpose(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Inverse_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4Inverse(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4AffineInverse_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4AffineInverse(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4OrthoInverse_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4OrthoInverse(&result, &mat);
-    return result;
-}
-
-static inline float vmathM4Determinant_V( VmathMatrix4 mat )
-{
-    return vmathM4Determinant(&mat);
-}
-
-static inline VmathMatrix4 vmathM4Add_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
-{
-    VmathMatrix4 result;
-    vmathM4Add(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Sub_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
-{
-    VmathMatrix4 result;
-    vmathM4Sub(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Neg_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4Neg(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4AbsPerElem_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4AbsPerElem(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4ScalarMul_V( VmathMatrix4 mat, float scalar )
-{
-    VmathMatrix4 result;
-    vmathM4ScalarMul(&result, &mat, scalar);
-    return result;
-}
-
-static inline VmathVector4 vmathM4MulV4_V( VmathMatrix4 mat, VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathM4MulV4(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathM4MulV3_V( VmathMatrix4 mat, VmathVector3 vec )
-{
-    VmathVector4 result;
-    vmathM4MulV3(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathM4MulP3_V( VmathMatrix4 mat, VmathPoint3 pnt )
-{
-    VmathVector4 result;
-    vmathM4MulP3(&result, &mat, &pnt);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Mul_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
-{
-    VmathMatrix4 result;
-    vmathM4Mul(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MulT3_V( VmathMatrix4 mat, VmathTransform3 tfrm1 )
-{
-    VmathMatrix4 result;
-    vmathM4MulT3(&result, &mat, &tfrm1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MulPerElem_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
-{
-    VmathMatrix4 result;
-    vmathM4MulPerElem(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeIdentity_V( )
-{
-    VmathMatrix4 result;
-    vmathM4MakeIdentity(&result);
-    return result;
-}
-
-static inline void vmathM4SetUpper3x3_V( VmathMatrix4 *result, VmathMatrix3 mat3 )
-{
-    vmathM4SetUpper3x3(result, &mat3);
-}
-
-static inline VmathMatrix3 vmathM4GetUpper3x3_V( VmathMatrix4 mat )
-{
-    VmathMatrix3 result;
-    vmathM4GetUpper3x3(&result, &mat);
-    return result;
-}
-
-static inline void vmathM4SetTranslation_V( VmathMatrix4 *result, VmathVector3 translateVec )
-{
-    vmathM4SetTranslation(result, &translateVec);
-}
-
-static inline VmathVector3 vmathM4GetTranslation_V( VmathMatrix4 mat )
-{
-    VmathVector3 result;
-    vmathM4GetTranslation(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationX_V( float radians )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationY_V( float radians )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationZ_V( float radians )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationZYX_V( VmathVector3 radiansXYZ )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationAxis_V( float radians, VmathVector3 unitVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationQ_V( VmathQuat unitQuat )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeScale_V( VmathVector3 scaleVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4AppendScale_V( VmathMatrix4 mat, VmathVector3 scaleVec )
-{
-    VmathMatrix4 result;
-    vmathM4AppendScale(&result, &mat, &scaleVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4PrependScale_V( VmathVector3 scaleVec, VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4PrependScale(&result, &scaleVec, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeTranslation_V( VmathVector3 translateVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeTranslation(&result, &translateVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeLookAt_V( VmathPoint3 eyePos, VmathPoint3 lookAtPos, VmathVector3 upVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeLookAt(&result, &eyePos, &lookAtPos, &upVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakePerspective_V( float fovyRadians, float aspect, float zNear, float zFar )
-{
-    VmathMatrix4 result;
-    vmathM4MakePerspective(&result, fovyRadians, aspect, zNear, zFar);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFrustum_V( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFrustum(&result, left, right, bottom, top, zNear, zFar);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeOrthographic_V( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    VmathMatrix4 result;
-    vmathM4MakeOrthographic(&result, left, right, bottom, top, zNear, zFar);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Select_V( VmathMatrix4 mat0, VmathMatrix4 mat1, unsigned int select1 )
-{
-    VmathMatrix4 result;
-    vmathM4Select(&result, &mat0, &mat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathM4Print_V( VmathMatrix4 mat )
-{
-    vmathM4Print(&mat);
-}
-
-static inline void vmathM4Prints_V( VmathMatrix4 mat, const char *name )
-{
-    vmathM4Prints(&mat, name);
-}
-
-#endif
-
-static inline VmathTransform3 vmathT3MakeFromScalar_V( float scalar )
-{
-    VmathTransform3 result;
-    vmathT3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeFromCols_V( VmathVector3 _col0, VmathVector3 _col1, VmathVector3 _col2, VmathVector3 _col3 )
-{
-    VmathTransform3 result;
-    vmathT3MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeFromM3V3_V( VmathMatrix3 tfrm, VmathVector3 translateVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeFromM3V3(&result, &tfrm, &translateVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeFromQV3(&result, &unitQuat, &translateVec);
-    return result;
-}
-
-static inline void vmathT3SetCol0_V( VmathTransform3 *result, VmathVector3 _col0 )
-{
-    vmathT3SetCol0(result, &_col0);
-}
-
-static inline void vmathT3SetCol1_V( VmathTransform3 *result, VmathVector3 _col1 )
-{
-    vmathT3SetCol1(result, &_col1);
-}
-
-static inline void vmathT3SetCol2_V( VmathTransform3 *result, VmathVector3 _col2 )
-{
-    vmathT3SetCol2(result, &_col2);
-}
-
-static inline void vmathT3SetCol3_V( VmathTransform3 *result, VmathVector3 _col3 )
-{
-    vmathT3SetCol3(result, &_col3);
-}
-
-static inline void vmathT3SetCol_V( VmathTransform3 *result, int col, VmathVector3 vec )
-{
-    vmathT3SetCol(result, col, &vec);
-}
-
-static inline void vmathT3SetRow_V( VmathTransform3 *result, int row, VmathVector4 vec )
-{
-    vmathT3SetRow(result, row, &vec);
-}
-
-static inline void vmathT3SetElem_V( VmathTransform3 *result, int col, int row, float val )
-{
-    vmathT3SetElem(result, col, row, val);
-}
-
-static inline float vmathT3GetElem_V( VmathTransform3 tfrm, int col, int row )
-{
-    return vmathT3GetElem(&tfrm, col, row);
-}
-
-static inline VmathVector3 vmathT3GetCol0_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetCol0(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3GetCol1_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetCol1(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3GetCol2_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetCol2(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3GetCol3_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetCol3(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3GetCol_V( VmathTransform3 tfrm, int col )
-{
-    VmathVector3 result;
-    vmathT3GetCol(&result, &tfrm, col);
-    return result;
-}
-
-static inline VmathVector4 vmathT3GetRow_V( VmathTransform3 tfrm, int row )
-{
-    VmathVector4 result;
-    vmathT3GetRow(&result, &tfrm, row);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3Inverse_V( VmathTransform3 tfrm )
-{
-    VmathTransform3 result;
-    vmathT3Inverse(&result, &tfrm);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3OrthoInverse_V( VmathTransform3 tfrm )
-{
-    VmathTransform3 result;
-    vmathT3OrthoInverse(&result, &tfrm);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3AbsPerElem_V( VmathTransform3 tfrm )
-{
-    VmathTransform3 result;
-    vmathT3AbsPerElem(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3MulV3_V( VmathTransform3 tfrm, VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathT3MulV3(&result, &tfrm, &vec);
-    return result;
-}
-
-static inline VmathPoint3 vmathT3MulP3_V( VmathTransform3 tfrm, VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathT3MulP3(&result, &tfrm, &pnt);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3Mul_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 )
-{
-    VmathTransform3 result;
-    vmathT3Mul(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MulPerElem_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 )
-{
-    VmathTransform3 result;
-    vmathT3MulPerElem(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeIdentity_V( )
-{
-    VmathTransform3 result;
-    vmathT3MakeIdentity(&result);
-    return result;
-}
-
-static inline void vmathT3SetUpper3x3_V( VmathTransform3 *result, VmathMatrix3 tfrm )
-{
-    vmathT3SetUpper3x3(result, &tfrm);
-}
-
-static inline VmathMatrix3 vmathT3GetUpper3x3_V( VmathTransform3 tfrm )
-{
-    VmathMatrix3 result;
-    vmathT3GetUpper3x3(&result, &tfrm);
-    return result;
-}
-
-static inline void vmathT3SetTranslation_V( VmathTransform3 *result, VmathVector3 translateVec )
-{
-    vmathT3SetTranslation(result, &translateVec);
-}
-
-static inline VmathVector3 vmathT3GetTranslation_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetTranslation(&result, &tfrm);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationX_V( float radians )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationY_V( float radians )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationZ_V( float radians )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationZYX_V( VmathVector3 radiansXYZ )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationAxis_V( float radians, VmathVector3 unitVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationQ_V( VmathQuat unitQuat )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeScale_V( VmathVector3 scaleVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3AppendScale_V( VmathTransform3 tfrm, VmathVector3 scaleVec )
-{
-    VmathTransform3 result;
-    vmathT3AppendScale(&result, &tfrm, &scaleVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3PrependScale_V( VmathVector3 scaleVec, VmathTransform3 tfrm )
-{
-    VmathTransform3 result;
-    vmathT3PrependScale(&result, &scaleVec, &tfrm);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeTranslation_V( VmathVector3 translateVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeTranslation(&result, &translateVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3Select_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, unsigned int select1 )
-{
-    VmathTransform3 result;
-    vmathT3Select(&result, &tfrm0, &tfrm1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathT3Print_V( VmathTransform3 tfrm )
-{
-    vmathT3Print(&tfrm);
-}
-
-static inline void vmathT3Prints_V( VmathTransform3 tfrm, const char *name )
-{
-    vmathT3Prints(&tfrm, name);
-}
-
-#endif
-
-static inline VmathQuat vmathQMakeFromM3_V( VmathMatrix3 tfrm )
-{
-    VmathQuat result;
-    vmathQMakeFromM3(&result, &tfrm);
-    return result;
-}
-
-static inline VmathMatrix3 vmathV3Outer_V( VmathVector3 tfrm0, VmathVector3 tfrm1 )
-{
-    VmathMatrix3 result;
-    vmathV3Outer(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathV4Outer_V( VmathVector4 tfrm0, VmathVector4 tfrm1 )
-{
-    VmathMatrix4 result;
-    vmathV4Outer(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3RowMul_V( VmathVector3 vec, VmathMatrix3 mat )
-{
-    VmathVector3 result;
-    vmathV3RowMul(&result, &vec, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathV3CrossMatrix_V( VmathVector3 vec )
-{
-    VmathMatrix3 result;
-    vmathV3CrossMatrix(&result, &vec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathV3CrossMatrixMul_V( VmathVector3 vec, VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathV3CrossMatrixMul(&result, &vec, &mat);
-    return result;
-}
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_AOS_V_C_H
+#define _VECTORMATH_MAT_AOS_V_C_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ */
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+static inline VmathMatrix3 vmathM3MakeFromScalar_V( float scalar )
+{
+    VmathMatrix3 result;
+    vmathM3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeFromQ_V( VmathQuat unitQuat )
+{
+    VmathMatrix3 result;
+    vmathM3MakeFromQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeFromCols_V( VmathVector3 _col0, VmathVector3 _col1, VmathVector3 _col2 )
+{
+    VmathMatrix3 result;
+    vmathM3MakeFromCols(&result, &_col0, &_col1, &_col2);
+    return result;
+}
+
+static inline void vmathM3SetCol0_V( VmathMatrix3 *result, VmathVector3 _col0 )
+{
+    vmathM3SetCol0(result, &_col0);
+}
+
+static inline void vmathM3SetCol1_V( VmathMatrix3 *result, VmathVector3 _col1 )
+{
+    vmathM3SetCol1(result, &_col1);
+}
+
+static inline void vmathM3SetCol2_V( VmathMatrix3 *result, VmathVector3 _col2 )
+{
+    vmathM3SetCol2(result, &_col2);
+}
+
+static inline void vmathM3SetCol_V( VmathMatrix3 *result, int col, VmathVector3 vec )
+{
+    vmathM3SetCol(result, col, &vec);
+}
+
+static inline void vmathM3SetRow_V( VmathMatrix3 *result, int row, VmathVector3 vec )
+{
+    vmathM3SetRow(result, row, &vec);
+}
+
+static inline void vmathM3SetElem_V( VmathMatrix3 *result, int col, int row, float val )
+{
+    vmathM3SetElem(result, col, row, val);
+}
+
+static inline float vmathM3GetElem_V( VmathMatrix3 mat, int col, int row )
+{
+    return vmathM3GetElem(&mat, col, row);
+}
+
+static inline VmathVector3 vmathM3GetCol0_V( VmathMatrix3 mat )
+{
+    VmathVector3 result;
+    vmathM3GetCol0(&result, &mat);
+    return result;
+}
+
+static inline VmathVector3 vmathM3GetCol1_V( VmathMatrix3 mat )
+{
+    VmathVector3 result;
+    vmathM3GetCol1(&result, &mat);
+    return result;
+}
+
+static inline VmathVector3 vmathM3GetCol2_V( VmathMatrix3 mat )
+{
+    VmathVector3 result;
+    vmathM3GetCol2(&result, &mat);
+    return result;
+}
+
+static inline VmathVector3 vmathM3GetCol_V( VmathMatrix3 mat, int col )
+{
+    VmathVector3 result;
+    vmathM3GetCol(&result, &mat, col);
+    return result;
+}
+
+static inline VmathVector3 vmathM3GetRow_V( VmathMatrix3 mat, int row )
+{
+    VmathVector3 result;
+    vmathM3GetRow(&result, &mat, row);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Transpose_V( VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3Transpose(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Inverse_V( VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3Inverse(&result, &mat);
+    return result;
+}
+
+static inline float vmathM3Determinant_V( VmathMatrix3 mat )
+{
+    return vmathM3Determinant(&mat);
+}
+
+static inline VmathMatrix3 vmathM3Add_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
+{
+    VmathMatrix3 result;
+    vmathM3Add(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Sub_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
+{
+    VmathMatrix3 result;
+    vmathM3Sub(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Neg_V( VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3Neg(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3AbsPerElem_V( VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3AbsPerElem(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3ScalarMul_V( VmathMatrix3 mat, float scalar )
+{
+    VmathMatrix3 result;
+    vmathM3ScalarMul(&result, &mat, scalar);
+    return result;
+}
+
+static inline VmathVector3 vmathM3MulV3_V( VmathMatrix3 mat, VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathM3MulV3(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Mul_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
+{
+    VmathMatrix3 result;
+    vmathM3Mul(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MulPerElem_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
+{
+    VmathMatrix3 result;
+    vmathM3MulPerElem(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeIdentity_V( )
+{
+    VmathMatrix3 result;
+    vmathM3MakeIdentity(&result);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationX_V( float radians )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationY_V( float radians )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationZ_V( float radians )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationZYX_V( VmathVector3 radiansXYZ )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationAxis_V( float radians, VmathVector3 unitVec )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationQ_V( VmathQuat unitQuat )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeScale_V( VmathVector3 scaleVec )
+{
+    VmathMatrix3 result;
+    vmathM3MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3AppendScale_V( VmathMatrix3 mat, VmathVector3 scaleVec )
+{
+    VmathMatrix3 result;
+    vmathM3AppendScale(&result, &mat, &scaleVec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3PrependScale_V( VmathVector3 scaleVec, VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3PrependScale(&result, &scaleVec, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Select_V( VmathMatrix3 mat0, VmathMatrix3 mat1, unsigned int select1 )
+{
+    VmathMatrix3 result;
+    vmathM3Select(&result, &mat0, &mat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathM3Print_V( VmathMatrix3 mat )
+{
+    vmathM3Print(&mat);
+}
+
+static inline void vmathM3Prints_V( VmathMatrix3 mat, const char *name )
+{
+    vmathM3Prints(&mat, name);
+}
+
+#endif
+
+static inline VmathMatrix4 vmathM4MakeFromScalar_V( float scalar )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFromT3_V( VmathTransform3 mat )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromT3(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFromCols_V( VmathVector4 _col0, VmathVector4 _col1, VmathVector4 _col2, VmathVector4 _col3 )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFromM3V3_V( VmathMatrix3 mat, VmathVector3 translateVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromM3V3(&result, &mat, &translateVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromQV3(&result, &unitQuat, &translateVec);
+    return result;
+}
+
+static inline void vmathM4SetCol0_V( VmathMatrix4 *result, VmathVector4 _col0 )
+{
+    vmathM4SetCol0(result, &_col0);
+}
+
+static inline void vmathM4SetCol1_V( VmathMatrix4 *result, VmathVector4 _col1 )
+{
+    vmathM4SetCol1(result, &_col1);
+}
+
+static inline void vmathM4SetCol2_V( VmathMatrix4 *result, VmathVector4 _col2 )
+{
+    vmathM4SetCol2(result, &_col2);
+}
+
+static inline void vmathM4SetCol3_V( VmathMatrix4 *result, VmathVector4 _col3 )
+{
+    vmathM4SetCol3(result, &_col3);
+}
+
+static inline void vmathM4SetCol_V( VmathMatrix4 *result, int col, VmathVector4 vec )
+{
+    vmathM4SetCol(result, col, &vec);
+}
+
+static inline void vmathM4SetRow_V( VmathMatrix4 *result, int row, VmathVector4 vec )
+{
+    vmathM4SetRow(result, row, &vec);
+}
+
+static inline void vmathM4SetElem_V( VmathMatrix4 *result, int col, int row, float val )
+{
+    vmathM4SetElem(result, col, row, val);
+}
+
+static inline float vmathM4GetElem_V( VmathMatrix4 mat, int col, int row )
+{
+    return vmathM4GetElem(&mat, col, row);
+}
+
+static inline VmathVector4 vmathM4GetCol0_V( VmathMatrix4 mat )
+{
+    VmathVector4 result;
+    vmathM4GetCol0(&result, &mat);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetCol1_V( VmathMatrix4 mat )
+{
+    VmathVector4 result;
+    vmathM4GetCol1(&result, &mat);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetCol2_V( VmathMatrix4 mat )
+{
+    VmathVector4 result;
+    vmathM4GetCol2(&result, &mat);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetCol3_V( VmathMatrix4 mat )
+{
+    VmathVector4 result;
+    vmathM4GetCol3(&result, &mat);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetCol_V( VmathMatrix4 mat, int col )
+{
+    VmathVector4 result;
+    vmathM4GetCol(&result, &mat, col);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetRow_V( VmathMatrix4 mat, int row )
+{
+    VmathVector4 result;
+    vmathM4GetRow(&result, &mat, row);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Transpose_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4Transpose(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Inverse_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4Inverse(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4AffineInverse_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4AffineInverse(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4OrthoInverse_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4OrthoInverse(&result, &mat);
+    return result;
+}
+
+static inline float vmathM4Determinant_V( VmathMatrix4 mat )
+{
+    return vmathM4Determinant(&mat);
+}
+
+static inline VmathMatrix4 vmathM4Add_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
+{
+    VmathMatrix4 result;
+    vmathM4Add(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Sub_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
+{
+    VmathMatrix4 result;
+    vmathM4Sub(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Neg_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4Neg(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4AbsPerElem_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4AbsPerElem(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4ScalarMul_V( VmathMatrix4 mat, float scalar )
+{
+    VmathMatrix4 result;
+    vmathM4ScalarMul(&result, &mat, scalar);
+    return result;
+}
+
+static inline VmathVector4 vmathM4MulV4_V( VmathMatrix4 mat, VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathM4MulV4(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathM4MulV3_V( VmathMatrix4 mat, VmathVector3 vec )
+{
+    VmathVector4 result;
+    vmathM4MulV3(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathM4MulP3_V( VmathMatrix4 mat, VmathPoint3 pnt )
+{
+    VmathVector4 result;
+    vmathM4MulP3(&result, &mat, &pnt);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Mul_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
+{
+    VmathMatrix4 result;
+    vmathM4Mul(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MulT3_V( VmathMatrix4 mat, VmathTransform3 tfrm1 )
+{
+    VmathMatrix4 result;
+    vmathM4MulT3(&result, &mat, &tfrm1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MulPerElem_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
+{
+    VmathMatrix4 result;
+    vmathM4MulPerElem(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeIdentity_V( )
+{
+    VmathMatrix4 result;
+    vmathM4MakeIdentity(&result);
+    return result;
+}
+
+static inline void vmathM4SetUpper3x3_V( VmathMatrix4 *result, VmathMatrix3 mat3 )
+{
+    vmathM4SetUpper3x3(result, &mat3);
+}
+
+static inline VmathMatrix3 vmathM4GetUpper3x3_V( VmathMatrix4 mat )
+{
+    VmathMatrix3 result;
+    vmathM4GetUpper3x3(&result, &mat);
+    return result;
+}
+
+static inline void vmathM4SetTranslation_V( VmathMatrix4 *result, VmathVector3 translateVec )
+{
+    vmathM4SetTranslation(result, &translateVec);
+}
+
+static inline VmathVector3 vmathM4GetTranslation_V( VmathMatrix4 mat )
+{
+    VmathVector3 result;
+    vmathM4GetTranslation(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationX_V( float radians )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationY_V( float radians )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationZ_V( float radians )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationZYX_V( VmathVector3 radiansXYZ )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationAxis_V( float radians, VmathVector3 unitVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationQ_V( VmathQuat unitQuat )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeScale_V( VmathVector3 scaleVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4AppendScale_V( VmathMatrix4 mat, VmathVector3 scaleVec )
+{
+    VmathMatrix4 result;
+    vmathM4AppendScale(&result, &mat, &scaleVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4PrependScale_V( VmathVector3 scaleVec, VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4PrependScale(&result, &scaleVec, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeTranslation_V( VmathVector3 translateVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeTranslation(&result, &translateVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeLookAt_V( VmathPoint3 eyePos, VmathPoint3 lookAtPos, VmathVector3 upVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeLookAt(&result, &eyePos, &lookAtPos, &upVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakePerspective_V( float fovyRadians, float aspect, float zNear, float zFar )
+{
+    VmathMatrix4 result;
+    vmathM4MakePerspective(&result, fovyRadians, aspect, zNear, zFar);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFrustum_V( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFrustum(&result, left, right, bottom, top, zNear, zFar);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeOrthographic_V( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    VmathMatrix4 result;
+    vmathM4MakeOrthographic(&result, left, right, bottom, top, zNear, zFar);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Select_V( VmathMatrix4 mat0, VmathMatrix4 mat1, unsigned int select1 )
+{
+    VmathMatrix4 result;
+    vmathM4Select(&result, &mat0, &mat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathM4Print_V( VmathMatrix4 mat )
+{
+    vmathM4Print(&mat);
+}
+
+static inline void vmathM4Prints_V( VmathMatrix4 mat, const char *name )
+{
+    vmathM4Prints(&mat, name);
+}
+
+#endif
+
+static inline VmathTransform3 vmathT3MakeFromScalar_V( float scalar )
+{
+    VmathTransform3 result;
+    vmathT3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeFromCols_V( VmathVector3 _col0, VmathVector3 _col1, VmathVector3 _col2, VmathVector3 _col3 )
+{
+    VmathTransform3 result;
+    vmathT3MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeFromM3V3_V( VmathMatrix3 tfrm, VmathVector3 translateVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeFromM3V3(&result, &tfrm, &translateVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeFromQV3(&result, &unitQuat, &translateVec);
+    return result;
+}
+
+static inline void vmathT3SetCol0_V( VmathTransform3 *result, VmathVector3 _col0 )
+{
+    vmathT3SetCol0(result, &_col0);
+}
+
+static inline void vmathT3SetCol1_V( VmathTransform3 *result, VmathVector3 _col1 )
+{
+    vmathT3SetCol1(result, &_col1);
+}
+
+static inline void vmathT3SetCol2_V( VmathTransform3 *result, VmathVector3 _col2 )
+{
+    vmathT3SetCol2(result, &_col2);
+}
+
+static inline void vmathT3SetCol3_V( VmathTransform3 *result, VmathVector3 _col3 )
+{
+    vmathT3SetCol3(result, &_col3);
+}
+
+static inline void vmathT3SetCol_V( VmathTransform3 *result, int col, VmathVector3 vec )
+{
+    vmathT3SetCol(result, col, &vec);
+}
+
+static inline void vmathT3SetRow_V( VmathTransform3 *result, int row, VmathVector4 vec )
+{
+    vmathT3SetRow(result, row, &vec);
+}
+
+static inline void vmathT3SetElem_V( VmathTransform3 *result, int col, int row, float val )
+{
+    vmathT3SetElem(result, col, row, val);
+}
+
+static inline float vmathT3GetElem_V( VmathTransform3 tfrm, int col, int row )
+{
+    return vmathT3GetElem(&tfrm, col, row);
+}
+
+static inline VmathVector3 vmathT3GetCol0_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetCol0(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3GetCol1_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetCol1(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3GetCol2_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetCol2(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3GetCol3_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetCol3(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3GetCol_V( VmathTransform3 tfrm, int col )
+{
+    VmathVector3 result;
+    vmathT3GetCol(&result, &tfrm, col);
+    return result;
+}
+
+static inline VmathVector4 vmathT3GetRow_V( VmathTransform3 tfrm, int row )
+{
+    VmathVector4 result;
+    vmathT3GetRow(&result, &tfrm, row);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3Inverse_V( VmathTransform3 tfrm )
+{
+    VmathTransform3 result;
+    vmathT3Inverse(&result, &tfrm);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3OrthoInverse_V( VmathTransform3 tfrm )
+{
+    VmathTransform3 result;
+    vmathT3OrthoInverse(&result, &tfrm);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3AbsPerElem_V( VmathTransform3 tfrm )
+{
+    VmathTransform3 result;
+    vmathT3AbsPerElem(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3MulV3_V( VmathTransform3 tfrm, VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathT3MulV3(&result, &tfrm, &vec);
+    return result;
+}
+
+static inline VmathPoint3 vmathT3MulP3_V( VmathTransform3 tfrm, VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathT3MulP3(&result, &tfrm, &pnt);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3Mul_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 )
+{
+    VmathTransform3 result;
+    vmathT3Mul(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MulPerElem_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 )
+{
+    VmathTransform3 result;
+    vmathT3MulPerElem(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeIdentity_V( )
+{
+    VmathTransform3 result;
+    vmathT3MakeIdentity(&result);
+    return result;
+}
+
+static inline void vmathT3SetUpper3x3_V( VmathTransform3 *result, VmathMatrix3 tfrm )
+{
+    vmathT3SetUpper3x3(result, &tfrm);
+}
+
+static inline VmathMatrix3 vmathT3GetUpper3x3_V( VmathTransform3 tfrm )
+{
+    VmathMatrix3 result;
+    vmathT3GetUpper3x3(&result, &tfrm);
+    return result;
+}
+
+static inline void vmathT3SetTranslation_V( VmathTransform3 *result, VmathVector3 translateVec )
+{
+    vmathT3SetTranslation(result, &translateVec);
+}
+
+static inline VmathVector3 vmathT3GetTranslation_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetTranslation(&result, &tfrm);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationX_V( float radians )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationY_V( float radians )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationZ_V( float radians )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationZYX_V( VmathVector3 radiansXYZ )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationAxis_V( float radians, VmathVector3 unitVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationQ_V( VmathQuat unitQuat )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeScale_V( VmathVector3 scaleVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3AppendScale_V( VmathTransform3 tfrm, VmathVector3 scaleVec )
+{
+    VmathTransform3 result;
+    vmathT3AppendScale(&result, &tfrm, &scaleVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3PrependScale_V( VmathVector3 scaleVec, VmathTransform3 tfrm )
+{
+    VmathTransform3 result;
+    vmathT3PrependScale(&result, &scaleVec, &tfrm);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeTranslation_V( VmathVector3 translateVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeTranslation(&result, &translateVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3Select_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, unsigned int select1 )
+{
+    VmathTransform3 result;
+    vmathT3Select(&result, &tfrm0, &tfrm1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathT3Print_V( VmathTransform3 tfrm )
+{
+    vmathT3Print(&tfrm);
+}
+
+static inline void vmathT3Prints_V( VmathTransform3 tfrm, const char *name )
+{
+    vmathT3Prints(&tfrm, name);
+}
+
+#endif
+
+static inline VmathQuat vmathQMakeFromM3_V( VmathMatrix3 tfrm )
+{
+    VmathQuat result;
+    vmathQMakeFromM3(&result, &tfrm);
+    return result;
+}
+
+static inline VmathMatrix3 vmathV3Outer_V( VmathVector3 tfrm0, VmathVector3 tfrm1 )
+{
+    VmathMatrix3 result;
+    vmathV3Outer(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathV4Outer_V( VmathVector4 tfrm0, VmathVector4 tfrm1 )
+{
+    VmathMatrix4 result;
+    vmathV4Outer(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3RowMul_V( VmathVector3 vec, VmathMatrix3 mat )
+{
+    VmathVector3 result;
+    vmathV3RowMul(&result, &vec, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathV3CrossMatrix_V( VmathVector3 vec )
+{
+    VmathMatrix3 result;
+    vmathV3CrossMatrix(&result, &vec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathV3CrossMatrixMul_V( VmathVector3 vec, VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathV3CrossMatrixMul(&result, &vec, &mat);
+    return result;
+}
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/scalar/c/quat_aos.h b/Extras/vectormathlibrary/include/vectormath/scalar/c/quat_aos.h
index 2b114e257..c886ab669 100644
--- a/Extras/vectormathlibrary/include/vectormath/scalar/c/quat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/scalar/c/quat_aos.h
@@ -1,368 +1,368 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_AOS_C_H
-#define _VECTORMATH_QUAT_AOS_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline void vmathQCopy( VmathQuat *result, const VmathQuat *quat )
-{
-    result->x = quat->x;
-    result->y = quat->y;
-    result->z = quat->z;
-    result->w = quat->w;
-}
-
-static inline void vmathQMakeFromElems( VmathQuat *result, float _x, float _y, float _z, float _w )
-{
-    result->x = _x;
-    result->y = _y;
-    result->z = _z;
-    result->w = _w;
-}
-
-static inline void vmathQMakeFromV3Scalar( VmathQuat *result, const VmathVector3 *xyz, float _w )
-{
-    vmathQSetXYZ( result, xyz );
-    vmathQSetW( result, _w );
-}
-
-static inline void vmathQMakeFromV4( VmathQuat *result, const VmathVector4 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-    result->w = vec->w;
-}
-
-static inline void vmathQMakeFromScalar( VmathQuat *result, float scalar )
-{
-    result->x = scalar;
-    result->y = scalar;
-    result->z = scalar;
-    result->w = scalar;
-}
-
-static inline void vmathQMakeIdentity( VmathQuat *result )
-{
-    vmathQMakeFromElems( result, 0.0f, 0.0f, 0.0f, 1.0f );
-}
-
-static inline void vmathQLerp( VmathQuat *result, float t, const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    VmathQuat tmpQ_0, tmpQ_1;
-    vmathQSub( &tmpQ_0, quat1, quat0 );
-    vmathQScalarMul( &tmpQ_1, &tmpQ_0, t );
-    vmathQAdd( result, quat0, &tmpQ_1 );
-}
-
-static inline void vmathQSlerp( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1 )
-{
-    VmathQuat start, tmpQ_0, tmpQ_1;
-    float recipSinAngle, scale0, scale1, cosAngle, angle;
-    cosAngle = vmathQDot( unitQuat0, unitQuat1 );
-    if ( cosAngle < 0.0f ) {
-        cosAngle = -cosAngle;
-        vmathQNeg( &start, unitQuat0 );
-    } else {
-        vmathQCopy( &start, unitQuat0 );
-    }
-    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
-        angle = acosf( cosAngle );
-        recipSinAngle = ( 1.0f / sinf( angle ) );
-        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
-        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
-    } else {
-        scale0 = ( 1.0f - t );
-        scale1 = t;
-    }
-    vmathQScalarMul( &tmpQ_0, &start, scale0 );
-    vmathQScalarMul( &tmpQ_1, unitQuat1, scale1 );
-    vmathQAdd( result, &tmpQ_0, &tmpQ_1 );
-}
-
-static inline void vmathQSquad( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1, const VmathQuat *unitQuat2, const VmathQuat *unitQuat3 )
-{
-    VmathQuat tmp0, tmp1;
-    vmathQSlerp( &tmp0, t, unitQuat0, unitQuat3 );
-    vmathQSlerp( &tmp1, t, unitQuat1, unitQuat2 );
-    vmathQSlerp( result, ( ( 2.0f * t ) * ( 1.0f - t ) ), &tmp0, &tmp1 );
-}
-
-static inline void vmathQSetXYZ( VmathQuat *result, const VmathVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-}
-
-static inline void vmathQGetXYZ( VmathVector3 *result, const VmathQuat *quat )
-{
-    vmathV3MakeFromElems( result, quat->x, quat->y, quat->z );
-}
-
-static inline void vmathQSetX( VmathQuat *result, float _x )
-{
-    result->x = _x;
-}
-
-static inline float vmathQGetX( const VmathQuat *quat )
-{
-    return quat->x;
-}
-
-static inline void vmathQSetY( VmathQuat *result, float _y )
-{
-    result->y = _y;
-}
-
-static inline float vmathQGetY( const VmathQuat *quat )
-{
-    return quat->y;
-}
-
-static inline void vmathQSetZ( VmathQuat *result, float _z )
-{
-    result->z = _z;
-}
-
-static inline float vmathQGetZ( const VmathQuat *quat )
-{
-    return quat->z;
-}
-
-static inline void vmathQSetW( VmathQuat *result, float _w )
-{
-    result->w = _w;
-}
-
-static inline float vmathQGetW( const VmathQuat *quat )
-{
-    return quat->w;
-}
-
-static inline void vmathQSetElem( VmathQuat *result, int idx, float value )
-{
-    *(&result->x + idx) = value;
-}
-
-static inline float vmathQGetElem( const VmathQuat *quat, int idx )
-{
-    return *(&quat->x + idx);
-}
-
-static inline void vmathQAdd( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    result->x = ( quat0->x + quat1->x );
-    result->y = ( quat0->y + quat1->y );
-    result->z = ( quat0->z + quat1->z );
-    result->w = ( quat0->w + quat1->w );
-}
-
-static inline void vmathQSub( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    result->x = ( quat0->x - quat1->x );
-    result->y = ( quat0->y - quat1->y );
-    result->z = ( quat0->z - quat1->z );
-    result->w = ( quat0->w - quat1->w );
-}
-
-static inline void vmathQScalarMul( VmathQuat *result, const VmathQuat *quat, float scalar )
-{
-    result->x = ( quat->x * scalar );
-    result->y = ( quat->y * scalar );
-    result->z = ( quat->z * scalar );
-    result->w = ( quat->w * scalar );
-}
-
-static inline void vmathQScalarDiv( VmathQuat *result, const VmathQuat *quat, float scalar )
-{
-    result->x = ( quat->x / scalar );
-    result->y = ( quat->y / scalar );
-    result->z = ( quat->z / scalar );
-    result->w = ( quat->w / scalar );
-}
-
-static inline void vmathQNeg( VmathQuat *result, const VmathQuat *quat )
-{
-    result->x = -quat->x;
-    result->y = -quat->y;
-    result->z = -quat->z;
-    result->w = -quat->w;
-}
-
-static inline float vmathQDot( const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    float result;
-    result = ( quat0->x * quat1->x );
-    result = ( result + ( quat0->y * quat1->y ) );
-    result = ( result + ( quat0->z * quat1->z ) );
-    result = ( result + ( quat0->w * quat1->w ) );
-    return result;
-}
-
-static inline float vmathQNorm( const VmathQuat *quat )
-{
-    float result;
-    result = ( quat->x * quat->x );
-    result = ( result + ( quat->y * quat->y ) );
-    result = ( result + ( quat->z * quat->z ) );
-    result = ( result + ( quat->w * quat->w ) );
-    return result;
-}
-
-static inline float vmathQLength( const VmathQuat *quat )
-{
-    return sqrtf( vmathQNorm( quat ) );
-}
-
-static inline void vmathQNormalize( VmathQuat *result, const VmathQuat *quat )
-{
-    float lenSqr, lenInv;
-    lenSqr = vmathQNorm( quat );
-    lenInv = ( 1.0f / sqrtf( lenSqr ) );
-    result->x = ( quat->x * lenInv );
-    result->y = ( quat->y * lenInv );
-    result->z = ( quat->z * lenInv );
-    result->w = ( quat->w * lenInv );
-}
-
-static inline void vmathQMakeRotationArc( VmathQuat *result, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
-{
-    VmathVector3 tmpV3_0, tmpV3_1;
-    float cosHalfAngleX2, recipCosHalfAngleX2;
-    cosHalfAngleX2 = sqrtf( ( 2.0f * ( 1.0f + vmathV3Dot( unitVec0, unitVec1 ) ) ) );
-    recipCosHalfAngleX2 = ( 1.0f / cosHalfAngleX2 );
-    vmathV3Cross( &tmpV3_0, unitVec0, unitVec1 );
-    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, recipCosHalfAngleX2 );
-    vmathQMakeFromV3Scalar( result, &tmpV3_1, ( cosHalfAngleX2 * 0.5f ) );
-}
-
-static inline void vmathQMakeRotationAxis( VmathQuat *result, float radians, const VmathVector3 *unitVec )
-{
-    VmathVector3 tmpV3_0;
-    float s, c, angle;
-    angle = ( radians * 0.5f );
-    s = sinf( angle );
-    c = cosf( angle );
-    vmathV3ScalarMul( &tmpV3_0, unitVec, s );
-    vmathQMakeFromV3Scalar( result, &tmpV3_0, c );
-}
-
-static inline void vmathQMakeRotationX( VmathQuat *result, float radians )
-{
-    float s, c, angle;
-    angle = ( radians * 0.5f );
-    s = sinf( angle );
-    c = cosf( angle );
-    vmathQMakeFromElems( result, s, 0.0f, 0.0f, c );
-}
-
-static inline void vmathQMakeRotationY( VmathQuat *result, float radians )
-{
-    float s, c, angle;
-    angle = ( radians * 0.5f );
-    s = sinf( angle );
-    c = cosf( angle );
-    vmathQMakeFromElems( result, 0.0f, s, 0.0f, c );
-}
-
-static inline void vmathQMakeRotationZ( VmathQuat *result, float radians )
-{
-    float s, c, angle;
-    angle = ( radians * 0.5f );
-    s = sinf( angle );
-    c = cosf( angle );
-    vmathQMakeFromElems( result, 0.0f, 0.0f, s, c );
-}
-
-static inline void vmathQMul( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    float tmpX, tmpY, tmpZ, tmpW;
-    tmpX = ( ( ( ( quat0->w * quat1->x ) + ( quat0->x * quat1->w ) ) + ( quat0->y * quat1->z ) ) - ( quat0->z * quat1->y ) );
-    tmpY = ( ( ( ( quat0->w * quat1->y ) + ( quat0->y * quat1->w ) ) + ( quat0->z * quat1->x ) ) - ( quat0->x * quat1->z ) );
-    tmpZ = ( ( ( ( quat0->w * quat1->z ) + ( quat0->z * quat1->w ) ) + ( quat0->x * quat1->y ) ) - ( quat0->y * quat1->x ) );
-    tmpW = ( ( ( ( quat0->w * quat1->w ) - ( quat0->x * quat1->x ) ) - ( quat0->y * quat1->y ) ) - ( quat0->z * quat1->z ) );
-    vmathQMakeFromElems( result, tmpX, tmpY, tmpZ, tmpW );
-}
-
-static inline void vmathQRotate( VmathVector3 *result, const VmathQuat *quat, const VmathVector3 *vec )
-{
-    float tmpX, tmpY, tmpZ, tmpW;
-    tmpX = ( ( ( quat->w * vec->x ) + ( quat->y * vec->z ) ) - ( quat->z * vec->y ) );
-    tmpY = ( ( ( quat->w * vec->y ) + ( quat->z * vec->x ) ) - ( quat->x * vec->z ) );
-    tmpZ = ( ( ( quat->w * vec->z ) + ( quat->x * vec->y ) ) - ( quat->y * vec->x ) );
-    tmpW = ( ( ( quat->x * vec->x ) + ( quat->y * vec->y ) ) + ( quat->z * vec->z ) );
-    result->x = ( ( ( ( tmpW * quat->x ) + ( tmpX * quat->w ) ) - ( tmpY * quat->z ) ) + ( tmpZ * quat->y ) );
-    result->y = ( ( ( ( tmpW * quat->y ) + ( tmpY * quat->w ) ) - ( tmpZ * quat->x ) ) + ( tmpX * quat->z ) );
-    result->z = ( ( ( ( tmpW * quat->z ) + ( tmpZ * quat->w ) ) - ( tmpX * quat->y ) ) + ( tmpY * quat->x ) );
-}
-
-static inline void vmathQConj( VmathQuat *result, const VmathQuat *quat )
-{
-    vmathQMakeFromElems( result, -quat->x, -quat->y, -quat->z, quat->w );
-}
-
-static inline void vmathQSelect( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, unsigned int select1 )
-{
-    result->x = ( select1 )? quat1->x : quat0->x;
-    result->y = ( select1 )? quat1->y : quat0->y;
-    result->z = ( select1 )? quat1->z : quat0->z;
-    result->w = ( select1 )? quat1->w : quat0->w;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathQPrint( const VmathQuat *quat )
-{
-    printf( "( %f %f %f %f )\n", quat->x, quat->y, quat->z, quat->w );
-}
-
-static inline void vmathQPrints( const VmathQuat *quat, const char *name )
-{
-    printf( "%s: ( %f %f %f %f )\n", name, quat->x, quat->y, quat->z, quat->w );
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_AOS_C_H
+#define _VECTORMATH_QUAT_AOS_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline void vmathQCopy( VmathQuat *result, const VmathQuat *quat )
+{
+    result->x = quat->x;
+    result->y = quat->y;
+    result->z = quat->z;
+    result->w = quat->w;
+}
+
+static inline void vmathQMakeFromElems( VmathQuat *result, float _x, float _y, float _z, float _w )
+{
+    result->x = _x;
+    result->y = _y;
+    result->z = _z;
+    result->w = _w;
+}
+
+static inline void vmathQMakeFromV3Scalar( VmathQuat *result, const VmathVector3 *xyz, float _w )
+{
+    vmathQSetXYZ( result, xyz );
+    vmathQSetW( result, _w );
+}
+
+static inline void vmathQMakeFromV4( VmathQuat *result, const VmathVector4 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+    result->w = vec->w;
+}
+
+static inline void vmathQMakeFromScalar( VmathQuat *result, float scalar )
+{
+    result->x = scalar;
+    result->y = scalar;
+    result->z = scalar;
+    result->w = scalar;
+}
+
+static inline void vmathQMakeIdentity( VmathQuat *result )
+{
+    vmathQMakeFromElems( result, 0.0f, 0.0f, 0.0f, 1.0f );
+}
+
+static inline void vmathQLerp( VmathQuat *result, float t, const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    VmathQuat tmpQ_0, tmpQ_1;
+    vmathQSub( &tmpQ_0, quat1, quat0 );
+    vmathQScalarMul( &tmpQ_1, &tmpQ_0, t );
+    vmathQAdd( result, quat0, &tmpQ_1 );
+}
+
+static inline void vmathQSlerp( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1 )
+{
+    VmathQuat start, tmpQ_0, tmpQ_1;
+    float recipSinAngle, scale0, scale1, cosAngle, angle;
+    cosAngle = vmathQDot( unitQuat0, unitQuat1 );
+    if ( cosAngle < 0.0f ) {
+        cosAngle = -cosAngle;
+        vmathQNeg( &start, unitQuat0 );
+    } else {
+        vmathQCopy( &start, unitQuat0 );
+    }
+    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
+        angle = acosf( cosAngle );
+        recipSinAngle = ( 1.0f / sinf( angle ) );
+        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
+        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
+    } else {
+        scale0 = ( 1.0f - t );
+        scale1 = t;
+    }
+    vmathQScalarMul( &tmpQ_0, &start, scale0 );
+    vmathQScalarMul( &tmpQ_1, unitQuat1, scale1 );
+    vmathQAdd( result, &tmpQ_0, &tmpQ_1 );
+}
+
+static inline void vmathQSquad( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1, const VmathQuat *unitQuat2, const VmathQuat *unitQuat3 )
+{
+    VmathQuat tmp0, tmp1;
+    vmathQSlerp( &tmp0, t, unitQuat0, unitQuat3 );
+    vmathQSlerp( &tmp1, t, unitQuat1, unitQuat2 );
+    vmathQSlerp( result, ( ( 2.0f * t ) * ( 1.0f - t ) ), &tmp0, &tmp1 );
+}
+
+static inline void vmathQSetXYZ( VmathQuat *result, const VmathVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+}
+
+static inline void vmathQGetXYZ( VmathVector3 *result, const VmathQuat *quat )
+{
+    vmathV3MakeFromElems( result, quat->x, quat->y, quat->z );
+}
+
+static inline void vmathQSetX( VmathQuat *result, float _x )
+{
+    result->x = _x;
+}
+
+static inline float vmathQGetX( const VmathQuat *quat )
+{
+    return quat->x;
+}
+
+static inline void vmathQSetY( VmathQuat *result, float _y )
+{
+    result->y = _y;
+}
+
+static inline float vmathQGetY( const VmathQuat *quat )
+{
+    return quat->y;
+}
+
+static inline void vmathQSetZ( VmathQuat *result, float _z )
+{
+    result->z = _z;
+}
+
+static inline float vmathQGetZ( const VmathQuat *quat )
+{
+    return quat->z;
+}
+
+static inline void vmathQSetW( VmathQuat *result, float _w )
+{
+    result->w = _w;
+}
+
+static inline float vmathQGetW( const VmathQuat *quat )
+{
+    return quat->w;
+}
+
+static inline void vmathQSetElem( VmathQuat *result, int idx, float value )
+{
+    *(&result->x + idx) = value;
+}
+
+static inline float vmathQGetElem( const VmathQuat *quat, int idx )
+{
+    return *(&quat->x + idx);
+}
+
+static inline void vmathQAdd( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    result->x = ( quat0->x + quat1->x );
+    result->y = ( quat0->y + quat1->y );
+    result->z = ( quat0->z + quat1->z );
+    result->w = ( quat0->w + quat1->w );
+}
+
+static inline void vmathQSub( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    result->x = ( quat0->x - quat1->x );
+    result->y = ( quat0->y - quat1->y );
+    result->z = ( quat0->z - quat1->z );
+    result->w = ( quat0->w - quat1->w );
+}
+
+static inline void vmathQScalarMul( VmathQuat *result, const VmathQuat *quat, float scalar )
+{
+    result->x = ( quat->x * scalar );
+    result->y = ( quat->y * scalar );
+    result->z = ( quat->z * scalar );
+    result->w = ( quat->w * scalar );
+}
+
+static inline void vmathQScalarDiv( VmathQuat *result, const VmathQuat *quat, float scalar )
+{
+    result->x = ( quat->x / scalar );
+    result->y = ( quat->y / scalar );
+    result->z = ( quat->z / scalar );
+    result->w = ( quat->w / scalar );
+}
+
+static inline void vmathQNeg( VmathQuat *result, const VmathQuat *quat )
+{
+    result->x = -quat->x;
+    result->y = -quat->y;
+    result->z = -quat->z;
+    result->w = -quat->w;
+}
+
+static inline float vmathQDot( const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    float result;
+    result = ( quat0->x * quat1->x );
+    result = ( result + ( quat0->y * quat1->y ) );
+    result = ( result + ( quat0->z * quat1->z ) );
+    result = ( result + ( quat0->w * quat1->w ) );
+    return result;
+}
+
+static inline float vmathQNorm( const VmathQuat *quat )
+{
+    float result;
+    result = ( quat->x * quat->x );
+    result = ( result + ( quat->y * quat->y ) );
+    result = ( result + ( quat->z * quat->z ) );
+    result = ( result + ( quat->w * quat->w ) );
+    return result;
+}
+
+static inline float vmathQLength( const VmathQuat *quat )
+{
+    return sqrtf( vmathQNorm( quat ) );
+}
+
+static inline void vmathQNormalize( VmathQuat *result, const VmathQuat *quat )
+{
+    float lenSqr, lenInv;
+    lenSqr = vmathQNorm( quat );
+    lenInv = ( 1.0f / sqrtf( lenSqr ) );
+    result->x = ( quat->x * lenInv );
+    result->y = ( quat->y * lenInv );
+    result->z = ( quat->z * lenInv );
+    result->w = ( quat->w * lenInv );
+}
+
+static inline void vmathQMakeRotationArc( VmathQuat *result, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
+{
+    VmathVector3 tmpV3_0, tmpV3_1;
+    float cosHalfAngleX2, recipCosHalfAngleX2;
+    cosHalfAngleX2 = sqrtf( ( 2.0f * ( 1.0f + vmathV3Dot( unitVec0, unitVec1 ) ) ) );
+    recipCosHalfAngleX2 = ( 1.0f / cosHalfAngleX2 );
+    vmathV3Cross( &tmpV3_0, unitVec0, unitVec1 );
+    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, recipCosHalfAngleX2 );
+    vmathQMakeFromV3Scalar( result, &tmpV3_1, ( cosHalfAngleX2 * 0.5f ) );
+}
+
+static inline void vmathQMakeRotationAxis( VmathQuat *result, float radians, const VmathVector3 *unitVec )
+{
+    VmathVector3 tmpV3_0;
+    float s, c, angle;
+    angle = ( radians * 0.5f );
+    s = sinf( angle );
+    c = cosf( angle );
+    vmathV3ScalarMul( &tmpV3_0, unitVec, s );
+    vmathQMakeFromV3Scalar( result, &tmpV3_0, c );
+}
+
+static inline void vmathQMakeRotationX( VmathQuat *result, float radians )
+{
+    float s, c, angle;
+    angle = ( radians * 0.5f );
+    s = sinf( angle );
+    c = cosf( angle );
+    vmathQMakeFromElems( result, s, 0.0f, 0.0f, c );
+}
+
+static inline void vmathQMakeRotationY( VmathQuat *result, float radians )
+{
+    float s, c, angle;
+    angle = ( radians * 0.5f );
+    s = sinf( angle );
+    c = cosf( angle );
+    vmathQMakeFromElems( result, 0.0f, s, 0.0f, c );
+}
+
+static inline void vmathQMakeRotationZ( VmathQuat *result, float radians )
+{
+    float s, c, angle;
+    angle = ( radians * 0.5f );
+    s = sinf( angle );
+    c = cosf( angle );
+    vmathQMakeFromElems( result, 0.0f, 0.0f, s, c );
+}
+
+static inline void vmathQMul( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    float tmpX, tmpY, tmpZ, tmpW;
+    tmpX = ( ( ( ( quat0->w * quat1->x ) + ( quat0->x * quat1->w ) ) + ( quat0->y * quat1->z ) ) - ( quat0->z * quat1->y ) );
+    tmpY = ( ( ( ( quat0->w * quat1->y ) + ( quat0->y * quat1->w ) ) + ( quat0->z * quat1->x ) ) - ( quat0->x * quat1->z ) );
+    tmpZ = ( ( ( ( quat0->w * quat1->z ) + ( quat0->z * quat1->w ) ) + ( quat0->x * quat1->y ) ) - ( quat0->y * quat1->x ) );
+    tmpW = ( ( ( ( quat0->w * quat1->w ) - ( quat0->x * quat1->x ) ) - ( quat0->y * quat1->y ) ) - ( quat0->z * quat1->z ) );
+    vmathQMakeFromElems( result, tmpX, tmpY, tmpZ, tmpW );
+}
+
+static inline void vmathQRotate( VmathVector3 *result, const VmathQuat *quat, const VmathVector3 *vec )
+{
+    float tmpX, tmpY, tmpZ, tmpW;
+    tmpX = ( ( ( quat->w * vec->x ) + ( quat->y * vec->z ) ) - ( quat->z * vec->y ) );
+    tmpY = ( ( ( quat->w * vec->y ) + ( quat->z * vec->x ) ) - ( quat->x * vec->z ) );
+    tmpZ = ( ( ( quat->w * vec->z ) + ( quat->x * vec->y ) ) - ( quat->y * vec->x ) );
+    tmpW = ( ( ( quat->x * vec->x ) + ( quat->y * vec->y ) ) + ( quat->z * vec->z ) );
+    result->x = ( ( ( ( tmpW * quat->x ) + ( tmpX * quat->w ) ) - ( tmpY * quat->z ) ) + ( tmpZ * quat->y ) );
+    result->y = ( ( ( ( tmpW * quat->y ) + ( tmpY * quat->w ) ) - ( tmpZ * quat->x ) ) + ( tmpX * quat->z ) );
+    result->z = ( ( ( ( tmpW * quat->z ) + ( tmpZ * quat->w ) ) - ( tmpX * quat->y ) ) + ( tmpY * quat->x ) );
+}
+
+static inline void vmathQConj( VmathQuat *result, const VmathQuat *quat )
+{
+    vmathQMakeFromElems( result, -quat->x, -quat->y, -quat->z, quat->w );
+}
+
+static inline void vmathQSelect( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, unsigned int select1 )
+{
+    result->x = ( select1 )? quat1->x : quat0->x;
+    result->y = ( select1 )? quat1->y : quat0->y;
+    result->z = ( select1 )? quat1->z : quat0->z;
+    result->w = ( select1 )? quat1->w : quat0->w;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathQPrint( const VmathQuat *quat )
+{
+    printf( "( %f %f %f %f )\n", quat->x, quat->y, quat->z, quat->w );
+}
+
+static inline void vmathQPrints( const VmathQuat *quat, const char *name )
+{
+    printf( "%s: ( %f %f %f %f )\n", name, quat->x, quat->y, quat->z, quat->w );
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/scalar/c/quat_aos_v.h b/Extras/vectormathlibrary/include/vectormath/scalar/c/quat_aos_v.h
index 8ccf0789b..b5a9e248d 100644
--- a/Extras/vectormathlibrary/include/vectormath/scalar/c/quat_aos_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/scalar/c/quat_aos_v.h
@@ -1,300 +1,300 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_AOS_V_C_H
-#define _VECTORMATH_QUAT_AOS_V_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline VmathQuat vmathQMakeFromElems_V( float _x, float _y, float _z, float _w )
-{
-    VmathQuat result;
-    vmathQMakeFromElems(&result, _x, _y, _z, _w);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeFromV3Scalar_V( VmathVector3 xyz, float _w )
-{
-    VmathQuat result;
-    vmathQMakeFromV3Scalar(&result, &xyz, _w);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeFromV4_V( VmathVector4 vec )
-{
-    VmathQuat result;
-    vmathQMakeFromV4(&result, &vec);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeFromScalar_V( float scalar )
-{
-    VmathQuat result;
-    vmathQMakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeIdentity_V( )
-{
-    VmathQuat result;
-    vmathQMakeIdentity(&result);
-    return result;
-}
-
-static inline VmathQuat vmathQLerp_V( float t, VmathQuat quat0, VmathQuat quat1 )
-{
-    VmathQuat result;
-    vmathQLerp(&result, t, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathQuat vmathQSlerp_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1 )
-{
-    VmathQuat result;
-    vmathQSlerp(&result, t, &unitQuat0, &unitQuat1);
-    return result;
-}
-
-static inline VmathQuat vmathQSquad_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1, VmathQuat unitQuat2, VmathQuat unitQuat3 )
-{
-    VmathQuat result;
-    vmathQSquad(&result, t, &unitQuat0, &unitQuat1, &unitQuat2, &unitQuat3);
-    return result;
-}
-
-static inline void vmathQSetXYZ_V( VmathQuat *result, VmathVector3 vec )
-{
-    vmathQSetXYZ(result, &vec);
-}
-
-static inline VmathVector3 vmathQGetXYZ_V( VmathQuat quat )
-{
-    VmathVector3 result;
-    vmathQGetXYZ(&result, &quat);
-    return result;
-}
-
-static inline void vmathQSetX_V( VmathQuat *result, float _x )
-{
-    vmathQSetX(result, _x);
-}
-
-static inline float vmathQGetX_V( VmathQuat quat )
-{
-    return vmathQGetX(&quat);
-}
-
-static inline void vmathQSetY_V( VmathQuat *result, float _y )
-{
-    vmathQSetY(result, _y);
-}
-
-static inline float vmathQGetY_V( VmathQuat quat )
-{
-    return vmathQGetY(&quat);
-}
-
-static inline void vmathQSetZ_V( VmathQuat *result, float _z )
-{
-    vmathQSetZ(result, _z);
-}
-
-static inline float vmathQGetZ_V( VmathQuat quat )
-{
-    return vmathQGetZ(&quat);
-}
-
-static inline void vmathQSetW_V( VmathQuat *result, float _w )
-{
-    vmathQSetW(result, _w);
-}
-
-static inline float vmathQGetW_V( VmathQuat quat )
-{
-    return vmathQGetW(&quat);
-}
-
-static inline void vmathQSetElem_V( VmathQuat *result, int idx, float value )
-{
-    vmathQSetElem(result, idx, value);
-}
-
-static inline float vmathQGetElem_V( VmathQuat quat, int idx )
-{
-    return vmathQGetElem(&quat, idx);
-}
-
-static inline VmathQuat vmathQAdd_V( VmathQuat quat0, VmathQuat quat1 )
-{
-    VmathQuat result;
-    vmathQAdd(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathQuat vmathQSub_V( VmathQuat quat0, VmathQuat quat1 )
-{
-    VmathQuat result;
-    vmathQSub(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathQuat vmathQScalarMul_V( VmathQuat quat, float scalar )
-{
-    VmathQuat result;
-    vmathQScalarMul(&result, &quat, scalar);
-    return result;
-}
-
-static inline VmathQuat vmathQScalarDiv_V( VmathQuat quat, float scalar )
-{
-    VmathQuat result;
-    vmathQScalarDiv(&result, &quat, scalar);
-    return result;
-}
-
-static inline VmathQuat vmathQNeg_V( VmathQuat quat )
-{
-    VmathQuat result;
-    vmathQNeg(&result, &quat);
-    return result;
-}
-
-static inline float vmathQDot_V( VmathQuat quat0, VmathQuat quat1 )
-{
-    return vmathQDot(&quat0, &quat1);
-}
-
-static inline float vmathQNorm_V( VmathQuat quat )
-{
-    return vmathQNorm(&quat);
-}
-
-static inline float vmathQLength_V( VmathQuat quat )
-{
-    return vmathQLength(&quat);
-}
-
-static inline VmathQuat vmathQNormalize_V( VmathQuat quat )
-{
-    VmathQuat result;
-    vmathQNormalize(&result, &quat);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationArc_V( VmathVector3 unitVec0, VmathVector3 unitVec1 )
-{
-    VmathQuat result;
-    vmathQMakeRotationArc(&result, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationAxis_V( float radians, VmathVector3 unitVec )
-{
-    VmathQuat result;
-    vmathQMakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationX_V( float radians )
-{
-    VmathQuat result;
-    vmathQMakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationY_V( float radians )
-{
-    VmathQuat result;
-    vmathQMakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationZ_V( float radians )
-{
-    VmathQuat result;
-    vmathQMakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathQuat vmathQMul_V( VmathQuat quat0, VmathQuat quat1 )
-{
-    VmathQuat result;
-    vmathQMul(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathVector3 vmathQRotate_V( VmathQuat quat, VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathQRotate(&result, &quat, &vec);
-    return result;
-}
-
-static inline VmathQuat vmathQConj_V( VmathQuat quat )
-{
-    VmathQuat result;
-    vmathQConj(&result, &quat);
-    return result;
-}
-
-static inline VmathQuat vmathQSelect_V( VmathQuat quat0, VmathQuat quat1, unsigned int select1 )
-{
-    VmathQuat result;
-    vmathQSelect(&result, &quat0, &quat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathQPrint_V( VmathQuat quat )
-{
-    vmathQPrint(&quat);
-}
-
-static inline void vmathQPrints_V( VmathQuat quat, const char *name )
-{
-    vmathQPrints(&quat, name);
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_AOS_V_C_H
+#define _VECTORMATH_QUAT_AOS_V_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline VmathQuat vmathQMakeFromElems_V( float _x, float _y, float _z, float _w )
+{
+    VmathQuat result;
+    vmathQMakeFromElems(&result, _x, _y, _z, _w);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeFromV3Scalar_V( VmathVector3 xyz, float _w )
+{
+    VmathQuat result;
+    vmathQMakeFromV3Scalar(&result, &xyz, _w);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeFromV4_V( VmathVector4 vec )
+{
+    VmathQuat result;
+    vmathQMakeFromV4(&result, &vec);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeFromScalar_V( float scalar )
+{
+    VmathQuat result;
+    vmathQMakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeIdentity_V( )
+{
+    VmathQuat result;
+    vmathQMakeIdentity(&result);
+    return result;
+}
+
+static inline VmathQuat vmathQLerp_V( float t, VmathQuat quat0, VmathQuat quat1 )
+{
+    VmathQuat result;
+    vmathQLerp(&result, t, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathQuat vmathQSlerp_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1 )
+{
+    VmathQuat result;
+    vmathQSlerp(&result, t, &unitQuat0, &unitQuat1);
+    return result;
+}
+
+static inline VmathQuat vmathQSquad_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1, VmathQuat unitQuat2, VmathQuat unitQuat3 )
+{
+    VmathQuat result;
+    vmathQSquad(&result, t, &unitQuat0, &unitQuat1, &unitQuat2, &unitQuat3);
+    return result;
+}
+
+static inline void vmathQSetXYZ_V( VmathQuat *result, VmathVector3 vec )
+{
+    vmathQSetXYZ(result, &vec);
+}
+
+static inline VmathVector3 vmathQGetXYZ_V( VmathQuat quat )
+{
+    VmathVector3 result;
+    vmathQGetXYZ(&result, &quat);
+    return result;
+}
+
+static inline void vmathQSetX_V( VmathQuat *result, float _x )
+{
+    vmathQSetX(result, _x);
+}
+
+static inline float vmathQGetX_V( VmathQuat quat )
+{
+    return vmathQGetX(&quat);
+}
+
+static inline void vmathQSetY_V( VmathQuat *result, float _y )
+{
+    vmathQSetY(result, _y);
+}
+
+static inline float vmathQGetY_V( VmathQuat quat )
+{
+    return vmathQGetY(&quat);
+}
+
+static inline void vmathQSetZ_V( VmathQuat *result, float _z )
+{
+    vmathQSetZ(result, _z);
+}
+
+static inline float vmathQGetZ_V( VmathQuat quat )
+{
+    return vmathQGetZ(&quat);
+}
+
+static inline void vmathQSetW_V( VmathQuat *result, float _w )
+{
+    vmathQSetW(result, _w);
+}
+
+static inline float vmathQGetW_V( VmathQuat quat )
+{
+    return vmathQGetW(&quat);
+}
+
+static inline void vmathQSetElem_V( VmathQuat *result, int idx, float value )
+{
+    vmathQSetElem(result, idx, value);
+}
+
+static inline float vmathQGetElem_V( VmathQuat quat, int idx )
+{
+    return vmathQGetElem(&quat, idx);
+}
+
+static inline VmathQuat vmathQAdd_V( VmathQuat quat0, VmathQuat quat1 )
+{
+    VmathQuat result;
+    vmathQAdd(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathQuat vmathQSub_V( VmathQuat quat0, VmathQuat quat1 )
+{
+    VmathQuat result;
+    vmathQSub(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathQuat vmathQScalarMul_V( VmathQuat quat, float scalar )
+{
+    VmathQuat result;
+    vmathQScalarMul(&result, &quat, scalar);
+    return result;
+}
+
+static inline VmathQuat vmathQScalarDiv_V( VmathQuat quat, float scalar )
+{
+    VmathQuat result;
+    vmathQScalarDiv(&result, &quat, scalar);
+    return result;
+}
+
+static inline VmathQuat vmathQNeg_V( VmathQuat quat )
+{
+    VmathQuat result;
+    vmathQNeg(&result, &quat);
+    return result;
+}
+
+static inline float vmathQDot_V( VmathQuat quat0, VmathQuat quat1 )
+{
+    return vmathQDot(&quat0, &quat1);
+}
+
+static inline float vmathQNorm_V( VmathQuat quat )
+{
+    return vmathQNorm(&quat);
+}
+
+static inline float vmathQLength_V( VmathQuat quat )
+{
+    return vmathQLength(&quat);
+}
+
+static inline VmathQuat vmathQNormalize_V( VmathQuat quat )
+{
+    VmathQuat result;
+    vmathQNormalize(&result, &quat);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationArc_V( VmathVector3 unitVec0, VmathVector3 unitVec1 )
+{
+    VmathQuat result;
+    vmathQMakeRotationArc(&result, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationAxis_V( float radians, VmathVector3 unitVec )
+{
+    VmathQuat result;
+    vmathQMakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationX_V( float radians )
+{
+    VmathQuat result;
+    vmathQMakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationY_V( float radians )
+{
+    VmathQuat result;
+    vmathQMakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationZ_V( float radians )
+{
+    VmathQuat result;
+    vmathQMakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathQuat vmathQMul_V( VmathQuat quat0, VmathQuat quat1 )
+{
+    VmathQuat result;
+    vmathQMul(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathVector3 vmathQRotate_V( VmathQuat quat, VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathQRotate(&result, &quat, &vec);
+    return result;
+}
+
+static inline VmathQuat vmathQConj_V( VmathQuat quat )
+{
+    VmathQuat result;
+    vmathQConj(&result, &quat);
+    return result;
+}
+
+static inline VmathQuat vmathQSelect_V( VmathQuat quat0, VmathQuat quat1, unsigned int select1 )
+{
+    VmathQuat result;
+    vmathQSelect(&result, &quat0, &quat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathQPrint_V( VmathQuat quat )
+{
+    vmathQPrint(&quat);
+}
+
+static inline void vmathQPrints_V( VmathQuat quat, const char *name )
+{
+    vmathQPrints(&quat, name);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/scalar/c/vec_aos.h b/Extras/vectormathlibrary/include/vectormath/scalar/c/vec_aos.h
index 80d974f99..f0b0f437d 100644
--- a/Extras/vectormathlibrary/include/vectormath/scalar/c/vec_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/scalar/c/vec_aos.h
@@ -1,971 +1,971 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_AOS_C_H
-#define _VECTORMATH_VEC_AOS_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- */
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-}
-
-static inline void vmathV3MakeFromElems( VmathVector3 *result, float _x, float _y, float _z )
-{
-    result->x = _x;
-    result->y = _y;
-    result->z = _z;
-}
-
-static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt )
-{
-    result->x = pnt->x;
-    result->y = pnt->y;
-    result->z = pnt->z;
-}
-
-static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar )
-{
-    result->x = scalar;
-    result->y = scalar;
-    result->z = scalar;
-}
-
-static inline void vmathV3MakeXAxis( VmathVector3 *result )
-{
-    vmathV3MakeFromElems( result, 1.0f, 0.0f, 0.0f );
-}
-
-static inline void vmathV3MakeYAxis( VmathVector3 *result )
-{
-    vmathV3MakeFromElems( result, 0.0f, 1.0f, 0.0f );
-}
-
-static inline void vmathV3MakeZAxis( VmathVector3 *result )
-{
-    vmathV3MakeFromElems( result, 0.0f, 0.0f, 1.0f );
-}
-
-static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    VmathVector3 tmpV3_0, tmpV3_1;
-    vmathV3Sub( &tmpV3_0, vec1, vec0 );
-    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
-    vmathV3Add( result, vec0, &tmpV3_1 );
-}
-
-static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
-{
-    VmathVector3 tmpV3_0, tmpV3_1;
-    float recipSinAngle, scale0, scale1, cosAngle, angle;
-    cosAngle = vmathV3Dot( unitVec0, unitVec1 );
-    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
-        angle = acosf( cosAngle );
-        recipSinAngle = ( 1.0f / sinf( angle ) );
-        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
-        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
-    } else {
-        scale0 = ( 1.0f - t );
-        scale1 = t;
-    }
-    vmathV3ScalarMul( &tmpV3_0, unitVec0, scale0 );
-    vmathV3ScalarMul( &tmpV3_1, unitVec1, scale1 );
-    vmathV3Add( result, &tmpV3_0, &tmpV3_1 );
-}
-
-static inline void vmathV3SetX( VmathVector3 *result, float _x )
-{
-    result->x = _x;
-}
-
-static inline float vmathV3GetX( const VmathVector3 *vec )
-{
-    return vec->x;
-}
-
-static inline void vmathV3SetY( VmathVector3 *result, float _y )
-{
-    result->y = _y;
-}
-
-static inline float vmathV3GetY( const VmathVector3 *vec )
-{
-    return vec->y;
-}
-
-static inline void vmathV3SetZ( VmathVector3 *result, float _z )
-{
-    result->z = _z;
-}
-
-static inline float vmathV3GetZ( const VmathVector3 *vec )
-{
-    return vec->z;
-}
-
-static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value )
-{
-    *(&result->x + idx) = value;
-}
-
-static inline float vmathV3GetElem( const VmathVector3 *vec, int idx )
-{
-    return *(&vec->x + idx);
-}
-
-static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->x = ( vec0->x + vec1->x );
-    result->y = ( vec0->y + vec1->y );
-    result->z = ( vec0->z + vec1->z );
-}
-
-static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->x = ( vec0->x - vec1->x );
-    result->y = ( vec0->y - vec1->y );
-    result->z = ( vec0->z - vec1->z );
-}
-
-static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt1 )
-{
-    result->x = ( vec->x + pnt1->x );
-    result->y = ( vec->y + pnt1->y );
-    result->z = ( vec->z + pnt1->z );
-}
-
-static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar )
-{
-    result->x = ( vec->x * scalar );
-    result->y = ( vec->y * scalar );
-    result->z = ( vec->z * scalar );
-}
-
-static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar )
-{
-    result->x = ( vec->x / scalar );
-    result->y = ( vec->y / scalar );
-    result->z = ( vec->z / scalar );
-}
-
-static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->x = -vec->x;
-    result->y = -vec->y;
-    result->z = -vec->z;
-}
-
-static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->x = ( vec0->x * vec1->x );
-    result->y = ( vec0->y * vec1->y );
-    result->z = ( vec0->z * vec1->z );
-}
-
-static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->x = ( vec0->x / vec1->x );
-    result->y = ( vec0->y / vec1->y );
-    result->z = ( vec0->z / vec1->z );
-}
-
-static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->x = ( 1.0f / vec->x );
-    result->y = ( 1.0f / vec->y );
-    result->z = ( 1.0f / vec->z );
-}
-
-static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->x = sqrtf( vec->x );
-    result->y = sqrtf( vec->y );
-    result->z = sqrtf( vec->z );
-}
-
-static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->x = ( 1.0f / sqrtf( vec->x ) );
-    result->y = ( 1.0f / sqrtf( vec->y ) );
-    result->z = ( 1.0f / sqrtf( vec->z ) );
-}
-
-static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->x = fabsf( vec->x );
-    result->y = fabsf( vec->y );
-    result->z = fabsf( vec->z );
-}
-
-static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->x = ( vec1->x < 0.0f )? -fabsf( vec0->x ) : fabsf( vec0->x );
-    result->y = ( vec1->y < 0.0f )? -fabsf( vec0->y ) : fabsf( vec0->y );
-    result->z = ( vec1->z < 0.0f )? -fabsf( vec0->z ) : fabsf( vec0->z );
-}
-
-static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->x = (vec0->x > vec1->x)? vec0->x : vec1->x;
-    result->y = (vec0->y > vec1->y)? vec0->y : vec1->y;
-    result->z = (vec0->z > vec1->z)? vec0->z : vec1->z;
-}
-
-static inline float vmathV3MaxElem( const VmathVector3 *vec )
-{
-    float result;
-    result = (vec->x > vec->y)? vec->x : vec->y;
-    result = (vec->z > result)? vec->z : result;
-    return result;
-}
-
-static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->x = (vec0->x < vec1->x)? vec0->x : vec1->x;
-    result->y = (vec0->y < vec1->y)? vec0->y : vec1->y;
-    result->z = (vec0->z < vec1->z)? vec0->z : vec1->z;
-}
-
-static inline float vmathV3MinElem( const VmathVector3 *vec )
-{
-    float result;
-    result = (vec->x < vec->y)? vec->x : vec->y;
-    result = (vec->z < result)? vec->z : result;
-    return result;
-}
-
-static inline float vmathV3Sum( const VmathVector3 *vec )
-{
-    float result;
-    result = ( vec->x + vec->y );
-    result = ( result + vec->z );
-    return result;
-}
-
-static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    float result;
-    result = ( vec0->x * vec1->x );
-    result = ( result + ( vec0->y * vec1->y ) );
-    result = ( result + ( vec0->z * vec1->z ) );
-    return result;
-}
-
-static inline float vmathV3LengthSqr( const VmathVector3 *vec )
-{
-    float result;
-    result = ( vec->x * vec->x );
-    result = ( result + ( vec->y * vec->y ) );
-    result = ( result + ( vec->z * vec->z ) );
-    return result;
-}
-
-static inline float vmathV3Length( const VmathVector3 *vec )
-{
-    return sqrtf( vmathV3LengthSqr( vec ) );
-}
-
-static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec )
-{
-    float lenSqr, lenInv;
-    lenSqr = vmathV3LengthSqr( vec );
-    lenInv = ( 1.0f / sqrtf( lenSqr ) );
-    result->x = ( vec->x * lenInv );
-    result->y = ( vec->y * lenInv );
-    result->z = ( vec->z * lenInv );
-}
-
-static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    float tmpX, tmpY, tmpZ;
-    tmpX = ( ( vec0->y * vec1->z ) - ( vec0->z * vec1->y ) );
-    tmpY = ( ( vec0->z * vec1->x ) - ( vec0->x * vec1->z ) );
-    tmpZ = ( ( vec0->x * vec1->y ) - ( vec0->y * vec1->x ) );
-    vmathV3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 )
-{
-    result->x = ( select1 )? vec1->x : vec0->x;
-    result->y = ( select1 )? vec1->y : vec0->y;
-    result->z = ( select1 )? vec1->z : vec0->z;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathV3Print( const VmathVector3 *vec )
-{
-    printf( "( %f %f %f )\n", vec->x, vec->y, vec->z );
-}
-
-static inline void vmathV3Prints( const VmathVector3 *vec, const char *name )
-{
-    printf( "%s: ( %f %f %f )\n", name, vec->x, vec->y, vec->z );
-}
-
-#endif
-
-static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-    result->w = vec->w;
-}
-
-static inline void vmathV4MakeFromElems( VmathVector4 *result, float _x, float _y, float _z, float _w )
-{
-    result->x = _x;
-    result->y = _y;
-    result->z = _z;
-    result->w = _w;
-}
-
-static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float _w )
-{
-    vmathV4SetXYZ( result, xyz );
-    vmathV4SetW( result, _w );
-}
-
-static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-    result->w = 0.0f;
-}
-
-static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt )
-{
-    result->x = pnt->x;
-    result->y = pnt->y;
-    result->z = pnt->z;
-    result->w = 1.0f;
-}
-
-static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat )
-{
-    result->x = quat->x;
-    result->y = quat->y;
-    result->z = quat->z;
-    result->w = quat->w;
-}
-
-static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar )
-{
-    result->x = scalar;
-    result->y = scalar;
-    result->z = scalar;
-    result->w = scalar;
-}
-
-static inline void vmathV4MakeXAxis( VmathVector4 *result )
-{
-    vmathV4MakeFromElems( result, 1.0f, 0.0f, 0.0f, 0.0f );
-}
-
-static inline void vmathV4MakeYAxis( VmathVector4 *result )
-{
-    vmathV4MakeFromElems( result, 0.0f, 1.0f, 0.0f, 0.0f );
-}
-
-static inline void vmathV4MakeZAxis( VmathVector4 *result )
-{
-    vmathV4MakeFromElems( result, 0.0f, 0.0f, 1.0f, 0.0f );
-}
-
-static inline void vmathV4MakeWAxis( VmathVector4 *result )
-{
-    vmathV4MakeFromElems( result, 0.0f, 0.0f, 0.0f, 1.0f );
-}
-
-static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    VmathVector4 tmpV4_0, tmpV4_1;
-    vmathV4Sub( &tmpV4_0, vec1, vec0 );
-    vmathV4ScalarMul( &tmpV4_1, &tmpV4_0, t );
-    vmathV4Add( result, vec0, &tmpV4_1 );
-}
-
-static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 )
-{
-    VmathVector4 tmpV4_0, tmpV4_1;
-    float recipSinAngle, scale0, scale1, cosAngle, angle;
-    cosAngle = vmathV4Dot( unitVec0, unitVec1 );
-    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
-        angle = acosf( cosAngle );
-        recipSinAngle = ( 1.0f / sinf( angle ) );
-        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
-        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
-    } else {
-        scale0 = ( 1.0f - t );
-        scale1 = t;
-    }
-    vmathV4ScalarMul( &tmpV4_0, unitVec0, scale0 );
-    vmathV4ScalarMul( &tmpV4_1, unitVec1, scale1 );
-    vmathV4Add( result, &tmpV4_0, &tmpV4_1 );
-}
-
-static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-}
-
-static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec )
-{
-    vmathV3MakeFromElems( result, vec->x, vec->y, vec->z );
-}
-
-static inline void vmathV4SetX( VmathVector4 *result, float _x )
-{
-    result->x = _x;
-}
-
-static inline float vmathV4GetX( const VmathVector4 *vec )
-{
-    return vec->x;
-}
-
-static inline void vmathV4SetY( VmathVector4 *result, float _y )
-{
-    result->y = _y;
-}
-
-static inline float vmathV4GetY( const VmathVector4 *vec )
-{
-    return vec->y;
-}
-
-static inline void vmathV4SetZ( VmathVector4 *result, float _z )
-{
-    result->z = _z;
-}
-
-static inline float vmathV4GetZ( const VmathVector4 *vec )
-{
-    return vec->z;
-}
-
-static inline void vmathV4SetW( VmathVector4 *result, float _w )
-{
-    result->w = _w;
-}
-
-static inline float vmathV4GetW( const VmathVector4 *vec )
-{
-    return vec->w;
-}
-
-static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value )
-{
-    *(&result->x + idx) = value;
-}
-
-static inline float vmathV4GetElem( const VmathVector4 *vec, int idx )
-{
-    return *(&vec->x + idx);
-}
-
-static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->x = ( vec0->x + vec1->x );
-    result->y = ( vec0->y + vec1->y );
-    result->z = ( vec0->z + vec1->z );
-    result->w = ( vec0->w + vec1->w );
-}
-
-static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->x = ( vec0->x - vec1->x );
-    result->y = ( vec0->y - vec1->y );
-    result->z = ( vec0->z - vec1->z );
-    result->w = ( vec0->w - vec1->w );
-}
-
-static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar )
-{
-    result->x = ( vec->x * scalar );
-    result->y = ( vec->y * scalar );
-    result->z = ( vec->z * scalar );
-    result->w = ( vec->w * scalar );
-}
-
-static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar )
-{
-    result->x = ( vec->x / scalar );
-    result->y = ( vec->y / scalar );
-    result->z = ( vec->z / scalar );
-    result->w = ( vec->w / scalar );
-}
-
-static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->x = -vec->x;
-    result->y = -vec->y;
-    result->z = -vec->z;
-    result->w = -vec->w;
-}
-
-static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->x = ( vec0->x * vec1->x );
-    result->y = ( vec0->y * vec1->y );
-    result->z = ( vec0->z * vec1->z );
-    result->w = ( vec0->w * vec1->w );
-}
-
-static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->x = ( vec0->x / vec1->x );
-    result->y = ( vec0->y / vec1->y );
-    result->z = ( vec0->z / vec1->z );
-    result->w = ( vec0->w / vec1->w );
-}
-
-static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->x = ( 1.0f / vec->x );
-    result->y = ( 1.0f / vec->y );
-    result->z = ( 1.0f / vec->z );
-    result->w = ( 1.0f / vec->w );
-}
-
-static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->x = sqrtf( vec->x );
-    result->y = sqrtf( vec->y );
-    result->z = sqrtf( vec->z );
-    result->w = sqrtf( vec->w );
-}
-
-static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->x = ( 1.0f / sqrtf( vec->x ) );
-    result->y = ( 1.0f / sqrtf( vec->y ) );
-    result->z = ( 1.0f / sqrtf( vec->z ) );
-    result->w = ( 1.0f / sqrtf( vec->w ) );
-}
-
-static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->x = fabsf( vec->x );
-    result->y = fabsf( vec->y );
-    result->z = fabsf( vec->z );
-    result->w = fabsf( vec->w );
-}
-
-static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->x = ( vec1->x < 0.0f )? -fabsf( vec0->x ) : fabsf( vec0->x );
-    result->y = ( vec1->y < 0.0f )? -fabsf( vec0->y ) : fabsf( vec0->y );
-    result->z = ( vec1->z < 0.0f )? -fabsf( vec0->z ) : fabsf( vec0->z );
-    result->w = ( vec1->w < 0.0f )? -fabsf( vec0->w ) : fabsf( vec0->w );
-}
-
-static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->x = (vec0->x > vec1->x)? vec0->x : vec1->x;
-    result->y = (vec0->y > vec1->y)? vec0->y : vec1->y;
-    result->z = (vec0->z > vec1->z)? vec0->z : vec1->z;
-    result->w = (vec0->w > vec1->w)? vec0->w : vec1->w;
-}
-
-static inline float vmathV4MaxElem( const VmathVector4 *vec )
-{
-    float result;
-    result = (vec->x > vec->y)? vec->x : vec->y;
-    result = (vec->z > result)? vec->z : result;
-    result = (vec->w > result)? vec->w : result;
-    return result;
-}
-
-static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->x = (vec0->x < vec1->x)? vec0->x : vec1->x;
-    result->y = (vec0->y < vec1->y)? vec0->y : vec1->y;
-    result->z = (vec0->z < vec1->z)? vec0->z : vec1->z;
-    result->w = (vec0->w < vec1->w)? vec0->w : vec1->w;
-}
-
-static inline float vmathV4MinElem( const VmathVector4 *vec )
-{
-    float result;
-    result = (vec->x < vec->y)? vec->x : vec->y;
-    result = (vec->z < result)? vec->z : result;
-    result = (vec->w < result)? vec->w : result;
-    return result;
-}
-
-static inline float vmathV4Sum( const VmathVector4 *vec )
-{
-    float result;
-    result = ( vec->x + vec->y );
-    result = ( result + vec->z );
-    result = ( result + vec->w );
-    return result;
-}
-
-static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    float result;
-    result = ( vec0->x * vec1->x );
-    result = ( result + ( vec0->y * vec1->y ) );
-    result = ( result + ( vec0->z * vec1->z ) );
-    result = ( result + ( vec0->w * vec1->w ) );
-    return result;
-}
-
-static inline float vmathV4LengthSqr( const VmathVector4 *vec )
-{
-    float result;
-    result = ( vec->x * vec->x );
-    result = ( result + ( vec->y * vec->y ) );
-    result = ( result + ( vec->z * vec->z ) );
-    result = ( result + ( vec->w * vec->w ) );
-    return result;
-}
-
-static inline float vmathV4Length( const VmathVector4 *vec )
-{
-    return sqrtf( vmathV4LengthSqr( vec ) );
-}
-
-static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec )
-{
-    float lenSqr, lenInv;
-    lenSqr = vmathV4LengthSqr( vec );
-    lenInv = ( 1.0f / sqrtf( lenSqr ) );
-    result->x = ( vec->x * lenInv );
-    result->y = ( vec->y * lenInv );
-    result->z = ( vec->z * lenInv );
-    result->w = ( vec->w * lenInv );
-}
-
-static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 )
-{
-    result->x = ( select1 )? vec1->x : vec0->x;
-    result->y = ( select1 )? vec1->y : vec0->y;
-    result->z = ( select1 )? vec1->z : vec0->z;
-    result->w = ( select1 )? vec1->w : vec0->w;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathV4Print( const VmathVector4 *vec )
-{
-    printf( "( %f %f %f %f )\n", vec->x, vec->y, vec->z, vec->w );
-}
-
-static inline void vmathV4Prints( const VmathVector4 *vec, const char *name )
-{
-    printf( "%s: ( %f %f %f %f )\n", name, vec->x, vec->y, vec->z, vec->w );
-}
-
-#endif
-
-static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->x = pnt->x;
-    result->y = pnt->y;
-    result->z = pnt->z;
-}
-
-static inline void vmathP3MakeFromElems( VmathPoint3 *result, float _x, float _y, float _z )
-{
-    result->x = _x;
-    result->y = _y;
-    result->z = _z;
-}
-
-static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-}
-
-static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar )
-{
-    result->x = scalar;
-    result->y = scalar;
-    result->z = scalar;
-}
-
-static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    VmathVector3 tmpV3_0, tmpV3_1;
-    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
-    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
-    vmathP3AddV3( result, pnt0, &tmpV3_1 );
-}
-
-static inline void vmathP3SetX( VmathPoint3 *result, float _x )
-{
-    result->x = _x;
-}
-
-static inline float vmathP3GetX( const VmathPoint3 *pnt )
-{
-    return pnt->x;
-}
-
-static inline void vmathP3SetY( VmathPoint3 *result, float _y )
-{
-    result->y = _y;
-}
-
-static inline float vmathP3GetY( const VmathPoint3 *pnt )
-{
-    return pnt->y;
-}
-
-static inline void vmathP3SetZ( VmathPoint3 *result, float _z )
-{
-    result->z = _z;
-}
-
-static inline float vmathP3GetZ( const VmathPoint3 *pnt )
-{
-    return pnt->z;
-}
-
-static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value )
-{
-    *(&result->x + idx) = value;
-}
-
-static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx )
-{
-    return *(&pnt->x + idx);
-}
-
-static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->x = ( pnt0->x - pnt1->x );
-    result->y = ( pnt0->y - pnt1->y );
-    result->z = ( pnt0->z - pnt1->z );
-}
-
-static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
-{
-    result->x = ( pnt->x + vec1->x );
-    result->y = ( pnt->y + vec1->y );
-    result->z = ( pnt->z + vec1->z );
-}
-
-static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
-{
-    result->x = ( pnt->x - vec1->x );
-    result->y = ( pnt->y - vec1->y );
-    result->z = ( pnt->z - vec1->z );
-}
-
-static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->x = ( pnt0->x * pnt1->x );
-    result->y = ( pnt0->y * pnt1->y );
-    result->z = ( pnt0->z * pnt1->z );
-}
-
-static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->x = ( pnt0->x / pnt1->x );
-    result->y = ( pnt0->y / pnt1->y );
-    result->z = ( pnt0->z / pnt1->z );
-}
-
-static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->x = ( 1.0f / pnt->x );
-    result->y = ( 1.0f / pnt->y );
-    result->z = ( 1.0f / pnt->z );
-}
-
-static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->x = sqrtf( pnt->x );
-    result->y = sqrtf( pnt->y );
-    result->z = sqrtf( pnt->z );
-}
-
-static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->x = ( 1.0f / sqrtf( pnt->x ) );
-    result->y = ( 1.0f / sqrtf( pnt->y ) );
-    result->z = ( 1.0f / sqrtf( pnt->z ) );
-}
-
-static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->x = fabsf( pnt->x );
-    result->y = fabsf( pnt->y );
-    result->z = fabsf( pnt->z );
-}
-
-static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->x = ( pnt1->x < 0.0f )? -fabsf( pnt0->x ) : fabsf( pnt0->x );
-    result->y = ( pnt1->y < 0.0f )? -fabsf( pnt0->y ) : fabsf( pnt0->y );
-    result->z = ( pnt1->z < 0.0f )? -fabsf( pnt0->z ) : fabsf( pnt0->z );
-}
-
-static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->x = (pnt0->x > pnt1->x)? pnt0->x : pnt1->x;
-    result->y = (pnt0->y > pnt1->y)? pnt0->y : pnt1->y;
-    result->z = (pnt0->z > pnt1->z)? pnt0->z : pnt1->z;
-}
-
-static inline float vmathP3MaxElem( const VmathPoint3 *pnt )
-{
-    float result;
-    result = (pnt->x > pnt->y)? pnt->x : pnt->y;
-    result = (pnt->z > result)? pnt->z : result;
-    return result;
-}
-
-static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->x = (pnt0->x < pnt1->x)? pnt0->x : pnt1->x;
-    result->y = (pnt0->y < pnt1->y)? pnt0->y : pnt1->y;
-    result->z = (pnt0->z < pnt1->z)? pnt0->z : pnt1->z;
-}
-
-static inline float vmathP3MinElem( const VmathPoint3 *pnt )
-{
-    float result;
-    result = (pnt->x < pnt->y)? pnt->x : pnt->y;
-    result = (pnt->z < result)? pnt->z : result;
-    return result;
-}
-
-static inline float vmathP3Sum( const VmathPoint3 *pnt )
-{
-    float result;
-    result = ( pnt->x + pnt->y );
-    result = ( result + pnt->z );
-    return result;
-}
-
-static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal )
-{
-    VmathPoint3 tmpP3_0;
-    vmathP3MakeFromScalar( &tmpP3_0, scaleVal );
-    vmathP3MulPerElem( result, pnt, &tmpP3_0 );
-}
-
-static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec )
-{
-    VmathPoint3 tmpP3_0;
-    vmathP3MakeFromV3( &tmpP3_0, scaleVec );
-    vmathP3MulPerElem( result, pnt, &tmpP3_0 );
-}
-
-static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec )
-{
-    float result;
-    result = ( pnt->x * unitVec->x );
-    result = ( result + ( pnt->y * unitVec->y ) );
-    result = ( result + ( pnt->z * unitVec->z ) );
-    return result;
-}
-
-static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt )
-{
-    VmathVector3 tmpV3_0;
-    vmathV3MakeFromP3( &tmpV3_0, pnt );
-    return vmathV3LengthSqr( &tmpV3_0 );
-}
-
-static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt )
-{
-    VmathVector3 tmpV3_0;
-    vmathV3MakeFromP3( &tmpV3_0, pnt );
-    return vmathV3Length( &tmpV3_0 );
-}
-
-static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    VmathVector3 tmpV3_0;
-    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
-    return vmathV3LengthSqr( &tmpV3_0 );
-}
-
-static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    VmathVector3 tmpV3_0;
-    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
-    return vmathV3Length( &tmpV3_0 );
-}
-
-static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 )
-{
-    result->x = ( select1 )? pnt1->x : pnt0->x;
-    result->y = ( select1 )? pnt1->y : pnt0->y;
-    result->z = ( select1 )? pnt1->z : pnt0->z;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathP3Print( const VmathPoint3 *pnt )
-{
-    printf( "( %f %f %f )\n", pnt->x, pnt->y, pnt->z );
-}
-
-static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name )
-{
-    printf( "%s: ( %f %f %f )\n", name, pnt->x, pnt->y, pnt->z );
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_AOS_C_H
+#define _VECTORMATH_VEC_AOS_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ */
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+}
+
+static inline void vmathV3MakeFromElems( VmathVector3 *result, float _x, float _y, float _z )
+{
+    result->x = _x;
+    result->y = _y;
+    result->z = _z;
+}
+
+static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt )
+{
+    result->x = pnt->x;
+    result->y = pnt->y;
+    result->z = pnt->z;
+}
+
+static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar )
+{
+    result->x = scalar;
+    result->y = scalar;
+    result->z = scalar;
+}
+
+static inline void vmathV3MakeXAxis( VmathVector3 *result )
+{
+    vmathV3MakeFromElems( result, 1.0f, 0.0f, 0.0f );
+}
+
+static inline void vmathV3MakeYAxis( VmathVector3 *result )
+{
+    vmathV3MakeFromElems( result, 0.0f, 1.0f, 0.0f );
+}
+
+static inline void vmathV3MakeZAxis( VmathVector3 *result )
+{
+    vmathV3MakeFromElems( result, 0.0f, 0.0f, 1.0f );
+}
+
+static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    VmathVector3 tmpV3_0, tmpV3_1;
+    vmathV3Sub( &tmpV3_0, vec1, vec0 );
+    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
+    vmathV3Add( result, vec0, &tmpV3_1 );
+}
+
+static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
+{
+    VmathVector3 tmpV3_0, tmpV3_1;
+    float recipSinAngle, scale0, scale1, cosAngle, angle;
+    cosAngle = vmathV3Dot( unitVec0, unitVec1 );
+    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
+        angle = acosf( cosAngle );
+        recipSinAngle = ( 1.0f / sinf( angle ) );
+        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
+        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
+    } else {
+        scale0 = ( 1.0f - t );
+        scale1 = t;
+    }
+    vmathV3ScalarMul( &tmpV3_0, unitVec0, scale0 );
+    vmathV3ScalarMul( &tmpV3_1, unitVec1, scale1 );
+    vmathV3Add( result, &tmpV3_0, &tmpV3_1 );
+}
+
+static inline void vmathV3SetX( VmathVector3 *result, float _x )
+{
+    result->x = _x;
+}
+
+static inline float vmathV3GetX( const VmathVector3 *vec )
+{
+    return vec->x;
+}
+
+static inline void vmathV3SetY( VmathVector3 *result, float _y )
+{
+    result->y = _y;
+}
+
+static inline float vmathV3GetY( const VmathVector3 *vec )
+{
+    return vec->y;
+}
+
+static inline void vmathV3SetZ( VmathVector3 *result, float _z )
+{
+    result->z = _z;
+}
+
+static inline float vmathV3GetZ( const VmathVector3 *vec )
+{
+    return vec->z;
+}
+
+static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value )
+{
+    *(&result->x + idx) = value;
+}
+
+static inline float vmathV3GetElem( const VmathVector3 *vec, int idx )
+{
+    return *(&vec->x + idx);
+}
+
+static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->x = ( vec0->x + vec1->x );
+    result->y = ( vec0->y + vec1->y );
+    result->z = ( vec0->z + vec1->z );
+}
+
+static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->x = ( vec0->x - vec1->x );
+    result->y = ( vec0->y - vec1->y );
+    result->z = ( vec0->z - vec1->z );
+}
+
+static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt1 )
+{
+    result->x = ( vec->x + pnt1->x );
+    result->y = ( vec->y + pnt1->y );
+    result->z = ( vec->z + pnt1->z );
+}
+
+static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar )
+{
+    result->x = ( vec->x * scalar );
+    result->y = ( vec->y * scalar );
+    result->z = ( vec->z * scalar );
+}
+
+static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar )
+{
+    result->x = ( vec->x / scalar );
+    result->y = ( vec->y / scalar );
+    result->z = ( vec->z / scalar );
+}
+
+static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->x = -vec->x;
+    result->y = -vec->y;
+    result->z = -vec->z;
+}
+
+static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->x = ( vec0->x * vec1->x );
+    result->y = ( vec0->y * vec1->y );
+    result->z = ( vec0->z * vec1->z );
+}
+
+static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->x = ( vec0->x / vec1->x );
+    result->y = ( vec0->y / vec1->y );
+    result->z = ( vec0->z / vec1->z );
+}
+
+static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->x = ( 1.0f / vec->x );
+    result->y = ( 1.0f / vec->y );
+    result->z = ( 1.0f / vec->z );
+}
+
+static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->x = sqrtf( vec->x );
+    result->y = sqrtf( vec->y );
+    result->z = sqrtf( vec->z );
+}
+
+static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->x = ( 1.0f / sqrtf( vec->x ) );
+    result->y = ( 1.0f / sqrtf( vec->y ) );
+    result->z = ( 1.0f / sqrtf( vec->z ) );
+}
+
+static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->x = fabsf( vec->x );
+    result->y = fabsf( vec->y );
+    result->z = fabsf( vec->z );
+}
+
+static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->x = ( vec1->x < 0.0f )? -fabsf( vec0->x ) : fabsf( vec0->x );
+    result->y = ( vec1->y < 0.0f )? -fabsf( vec0->y ) : fabsf( vec0->y );
+    result->z = ( vec1->z < 0.0f )? -fabsf( vec0->z ) : fabsf( vec0->z );
+}
+
+static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->x = (vec0->x > vec1->x)? vec0->x : vec1->x;
+    result->y = (vec0->y > vec1->y)? vec0->y : vec1->y;
+    result->z = (vec0->z > vec1->z)? vec0->z : vec1->z;
+}
+
+static inline float vmathV3MaxElem( const VmathVector3 *vec )
+{
+    float result;
+    result = (vec->x > vec->y)? vec->x : vec->y;
+    result = (vec->z > result)? vec->z : result;
+    return result;
+}
+
+static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->x = (vec0->x < vec1->x)? vec0->x : vec1->x;
+    result->y = (vec0->y < vec1->y)? vec0->y : vec1->y;
+    result->z = (vec0->z < vec1->z)? vec0->z : vec1->z;
+}
+
+static inline float vmathV3MinElem( const VmathVector3 *vec )
+{
+    float result;
+    result = (vec->x < vec->y)? vec->x : vec->y;
+    result = (vec->z < result)? vec->z : result;
+    return result;
+}
+
+static inline float vmathV3Sum( const VmathVector3 *vec )
+{
+    float result;
+    result = ( vec->x + vec->y );
+    result = ( result + vec->z );
+    return result;
+}
+
+static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    float result;
+    result = ( vec0->x * vec1->x );
+    result = ( result + ( vec0->y * vec1->y ) );
+    result = ( result + ( vec0->z * vec1->z ) );
+    return result;
+}
+
+static inline float vmathV3LengthSqr( const VmathVector3 *vec )
+{
+    float result;
+    result = ( vec->x * vec->x );
+    result = ( result + ( vec->y * vec->y ) );
+    result = ( result + ( vec->z * vec->z ) );
+    return result;
+}
+
+static inline float vmathV3Length( const VmathVector3 *vec )
+{
+    return sqrtf( vmathV3LengthSqr( vec ) );
+}
+
+static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec )
+{
+    float lenSqr, lenInv;
+    lenSqr = vmathV3LengthSqr( vec );
+    lenInv = ( 1.0f / sqrtf( lenSqr ) );
+    result->x = ( vec->x * lenInv );
+    result->y = ( vec->y * lenInv );
+    result->z = ( vec->z * lenInv );
+}
+
+static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    float tmpX, tmpY, tmpZ;
+    tmpX = ( ( vec0->y * vec1->z ) - ( vec0->z * vec1->y ) );
+    tmpY = ( ( vec0->z * vec1->x ) - ( vec0->x * vec1->z ) );
+    tmpZ = ( ( vec0->x * vec1->y ) - ( vec0->y * vec1->x ) );
+    vmathV3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 )
+{
+    result->x = ( select1 )? vec1->x : vec0->x;
+    result->y = ( select1 )? vec1->y : vec0->y;
+    result->z = ( select1 )? vec1->z : vec0->z;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathV3Print( const VmathVector3 *vec )
+{
+    printf( "( %f %f %f )\n", vec->x, vec->y, vec->z );
+}
+
+static inline void vmathV3Prints( const VmathVector3 *vec, const char *name )
+{
+    printf( "%s: ( %f %f %f )\n", name, vec->x, vec->y, vec->z );
+}
+
+#endif
+
+static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+    result->w = vec->w;
+}
+
+static inline void vmathV4MakeFromElems( VmathVector4 *result, float _x, float _y, float _z, float _w )
+{
+    result->x = _x;
+    result->y = _y;
+    result->z = _z;
+    result->w = _w;
+}
+
+static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float _w )
+{
+    vmathV4SetXYZ( result, xyz );
+    vmathV4SetW( result, _w );
+}
+
+static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+    result->w = 0.0f;
+}
+
+static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt )
+{
+    result->x = pnt->x;
+    result->y = pnt->y;
+    result->z = pnt->z;
+    result->w = 1.0f;
+}
+
+static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat )
+{
+    result->x = quat->x;
+    result->y = quat->y;
+    result->z = quat->z;
+    result->w = quat->w;
+}
+
+static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar )
+{
+    result->x = scalar;
+    result->y = scalar;
+    result->z = scalar;
+    result->w = scalar;
+}
+
+static inline void vmathV4MakeXAxis( VmathVector4 *result )
+{
+    vmathV4MakeFromElems( result, 1.0f, 0.0f, 0.0f, 0.0f );
+}
+
+static inline void vmathV4MakeYAxis( VmathVector4 *result )
+{
+    vmathV4MakeFromElems( result, 0.0f, 1.0f, 0.0f, 0.0f );
+}
+
+static inline void vmathV4MakeZAxis( VmathVector4 *result )
+{
+    vmathV4MakeFromElems( result, 0.0f, 0.0f, 1.0f, 0.0f );
+}
+
+static inline void vmathV4MakeWAxis( VmathVector4 *result )
+{
+    vmathV4MakeFromElems( result, 0.0f, 0.0f, 0.0f, 1.0f );
+}
+
+static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    VmathVector4 tmpV4_0, tmpV4_1;
+    vmathV4Sub( &tmpV4_0, vec1, vec0 );
+    vmathV4ScalarMul( &tmpV4_1, &tmpV4_0, t );
+    vmathV4Add( result, vec0, &tmpV4_1 );
+}
+
+static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 )
+{
+    VmathVector4 tmpV4_0, tmpV4_1;
+    float recipSinAngle, scale0, scale1, cosAngle, angle;
+    cosAngle = vmathV4Dot( unitVec0, unitVec1 );
+    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
+        angle = acosf( cosAngle );
+        recipSinAngle = ( 1.0f / sinf( angle ) );
+        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
+        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
+    } else {
+        scale0 = ( 1.0f - t );
+        scale1 = t;
+    }
+    vmathV4ScalarMul( &tmpV4_0, unitVec0, scale0 );
+    vmathV4ScalarMul( &tmpV4_1, unitVec1, scale1 );
+    vmathV4Add( result, &tmpV4_0, &tmpV4_1 );
+}
+
+static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+}
+
+static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec )
+{
+    vmathV3MakeFromElems( result, vec->x, vec->y, vec->z );
+}
+
+static inline void vmathV4SetX( VmathVector4 *result, float _x )
+{
+    result->x = _x;
+}
+
+static inline float vmathV4GetX( const VmathVector4 *vec )
+{
+    return vec->x;
+}
+
+static inline void vmathV4SetY( VmathVector4 *result, float _y )
+{
+    result->y = _y;
+}
+
+static inline float vmathV4GetY( const VmathVector4 *vec )
+{
+    return vec->y;
+}
+
+static inline void vmathV4SetZ( VmathVector4 *result, float _z )
+{
+    result->z = _z;
+}
+
+static inline float vmathV4GetZ( const VmathVector4 *vec )
+{
+    return vec->z;
+}
+
+static inline void vmathV4SetW( VmathVector4 *result, float _w )
+{
+    result->w = _w;
+}
+
+static inline float vmathV4GetW( const VmathVector4 *vec )
+{
+    return vec->w;
+}
+
+static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value )
+{
+    *(&result->x + idx) = value;
+}
+
+static inline float vmathV4GetElem( const VmathVector4 *vec, int idx )
+{
+    return *(&vec->x + idx);
+}
+
+static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->x = ( vec0->x + vec1->x );
+    result->y = ( vec0->y + vec1->y );
+    result->z = ( vec0->z + vec1->z );
+    result->w = ( vec0->w + vec1->w );
+}
+
+static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->x = ( vec0->x - vec1->x );
+    result->y = ( vec0->y - vec1->y );
+    result->z = ( vec0->z - vec1->z );
+    result->w = ( vec0->w - vec1->w );
+}
+
+static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar )
+{
+    result->x = ( vec->x * scalar );
+    result->y = ( vec->y * scalar );
+    result->z = ( vec->z * scalar );
+    result->w = ( vec->w * scalar );
+}
+
+static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar )
+{
+    result->x = ( vec->x / scalar );
+    result->y = ( vec->y / scalar );
+    result->z = ( vec->z / scalar );
+    result->w = ( vec->w / scalar );
+}
+
+static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->x = -vec->x;
+    result->y = -vec->y;
+    result->z = -vec->z;
+    result->w = -vec->w;
+}
+
+static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->x = ( vec0->x * vec1->x );
+    result->y = ( vec0->y * vec1->y );
+    result->z = ( vec0->z * vec1->z );
+    result->w = ( vec0->w * vec1->w );
+}
+
+static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->x = ( vec0->x / vec1->x );
+    result->y = ( vec0->y / vec1->y );
+    result->z = ( vec0->z / vec1->z );
+    result->w = ( vec0->w / vec1->w );
+}
+
+static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->x = ( 1.0f / vec->x );
+    result->y = ( 1.0f / vec->y );
+    result->z = ( 1.0f / vec->z );
+    result->w = ( 1.0f / vec->w );
+}
+
+static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->x = sqrtf( vec->x );
+    result->y = sqrtf( vec->y );
+    result->z = sqrtf( vec->z );
+    result->w = sqrtf( vec->w );
+}
+
+static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->x = ( 1.0f / sqrtf( vec->x ) );
+    result->y = ( 1.0f / sqrtf( vec->y ) );
+    result->z = ( 1.0f / sqrtf( vec->z ) );
+    result->w = ( 1.0f / sqrtf( vec->w ) );
+}
+
+static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->x = fabsf( vec->x );
+    result->y = fabsf( vec->y );
+    result->z = fabsf( vec->z );
+    result->w = fabsf( vec->w );
+}
+
+static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->x = ( vec1->x < 0.0f )? -fabsf( vec0->x ) : fabsf( vec0->x );
+    result->y = ( vec1->y < 0.0f )? -fabsf( vec0->y ) : fabsf( vec0->y );
+    result->z = ( vec1->z < 0.0f )? -fabsf( vec0->z ) : fabsf( vec0->z );
+    result->w = ( vec1->w < 0.0f )? -fabsf( vec0->w ) : fabsf( vec0->w );
+}
+
+static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->x = (vec0->x > vec1->x)? vec0->x : vec1->x;
+    result->y = (vec0->y > vec1->y)? vec0->y : vec1->y;
+    result->z = (vec0->z > vec1->z)? vec0->z : vec1->z;
+    result->w = (vec0->w > vec1->w)? vec0->w : vec1->w;
+}
+
+static inline float vmathV4MaxElem( const VmathVector4 *vec )
+{
+    float result;
+    result = (vec->x > vec->y)? vec->x : vec->y;
+    result = (vec->z > result)? vec->z : result;
+    result = (vec->w > result)? vec->w : result;
+    return result;
+}
+
+static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->x = (vec0->x < vec1->x)? vec0->x : vec1->x;
+    result->y = (vec0->y < vec1->y)? vec0->y : vec1->y;
+    result->z = (vec0->z < vec1->z)? vec0->z : vec1->z;
+    result->w = (vec0->w < vec1->w)? vec0->w : vec1->w;
+}
+
+static inline float vmathV4MinElem( const VmathVector4 *vec )
+{
+    float result;
+    result = (vec->x < vec->y)? vec->x : vec->y;
+    result = (vec->z < result)? vec->z : result;
+    result = (vec->w < result)? vec->w : result;
+    return result;
+}
+
+static inline float vmathV4Sum( const VmathVector4 *vec )
+{
+    float result;
+    result = ( vec->x + vec->y );
+    result = ( result + vec->z );
+    result = ( result + vec->w );
+    return result;
+}
+
+static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    float result;
+    result = ( vec0->x * vec1->x );
+    result = ( result + ( vec0->y * vec1->y ) );
+    result = ( result + ( vec0->z * vec1->z ) );
+    result = ( result + ( vec0->w * vec1->w ) );
+    return result;
+}
+
+static inline float vmathV4LengthSqr( const VmathVector4 *vec )
+{
+    float result;
+    result = ( vec->x * vec->x );
+    result = ( result + ( vec->y * vec->y ) );
+    result = ( result + ( vec->z * vec->z ) );
+    result = ( result + ( vec->w * vec->w ) );
+    return result;
+}
+
+static inline float vmathV4Length( const VmathVector4 *vec )
+{
+    return sqrtf( vmathV4LengthSqr( vec ) );
+}
+
+static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec )
+{
+    float lenSqr, lenInv;
+    lenSqr = vmathV4LengthSqr( vec );
+    lenInv = ( 1.0f / sqrtf( lenSqr ) );
+    result->x = ( vec->x * lenInv );
+    result->y = ( vec->y * lenInv );
+    result->z = ( vec->z * lenInv );
+    result->w = ( vec->w * lenInv );
+}
+
+static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 )
+{
+    result->x = ( select1 )? vec1->x : vec0->x;
+    result->y = ( select1 )? vec1->y : vec0->y;
+    result->z = ( select1 )? vec1->z : vec0->z;
+    result->w = ( select1 )? vec1->w : vec0->w;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathV4Print( const VmathVector4 *vec )
+{
+    printf( "( %f %f %f %f )\n", vec->x, vec->y, vec->z, vec->w );
+}
+
+static inline void vmathV4Prints( const VmathVector4 *vec, const char *name )
+{
+    printf( "%s: ( %f %f %f %f )\n", name, vec->x, vec->y, vec->z, vec->w );
+}
+
+#endif
+
+static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->x = pnt->x;
+    result->y = pnt->y;
+    result->z = pnt->z;
+}
+
+static inline void vmathP3MakeFromElems( VmathPoint3 *result, float _x, float _y, float _z )
+{
+    result->x = _x;
+    result->y = _y;
+    result->z = _z;
+}
+
+static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+}
+
+static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar )
+{
+    result->x = scalar;
+    result->y = scalar;
+    result->z = scalar;
+}
+
+static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    VmathVector3 tmpV3_0, tmpV3_1;
+    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
+    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
+    vmathP3AddV3( result, pnt0, &tmpV3_1 );
+}
+
+static inline void vmathP3SetX( VmathPoint3 *result, float _x )
+{
+    result->x = _x;
+}
+
+static inline float vmathP3GetX( const VmathPoint3 *pnt )
+{
+    return pnt->x;
+}
+
+static inline void vmathP3SetY( VmathPoint3 *result, float _y )
+{
+    result->y = _y;
+}
+
+static inline float vmathP3GetY( const VmathPoint3 *pnt )
+{
+    return pnt->y;
+}
+
+static inline void vmathP3SetZ( VmathPoint3 *result, float _z )
+{
+    result->z = _z;
+}
+
+static inline float vmathP3GetZ( const VmathPoint3 *pnt )
+{
+    return pnt->z;
+}
+
+static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value )
+{
+    *(&result->x + idx) = value;
+}
+
+static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx )
+{
+    return *(&pnt->x + idx);
+}
+
+static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->x = ( pnt0->x - pnt1->x );
+    result->y = ( pnt0->y - pnt1->y );
+    result->z = ( pnt0->z - pnt1->z );
+}
+
+static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
+{
+    result->x = ( pnt->x + vec1->x );
+    result->y = ( pnt->y + vec1->y );
+    result->z = ( pnt->z + vec1->z );
+}
+
+static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
+{
+    result->x = ( pnt->x - vec1->x );
+    result->y = ( pnt->y - vec1->y );
+    result->z = ( pnt->z - vec1->z );
+}
+
+static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->x = ( pnt0->x * pnt1->x );
+    result->y = ( pnt0->y * pnt1->y );
+    result->z = ( pnt0->z * pnt1->z );
+}
+
+static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->x = ( pnt0->x / pnt1->x );
+    result->y = ( pnt0->y / pnt1->y );
+    result->z = ( pnt0->z / pnt1->z );
+}
+
+static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->x = ( 1.0f / pnt->x );
+    result->y = ( 1.0f / pnt->y );
+    result->z = ( 1.0f / pnt->z );
+}
+
+static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->x = sqrtf( pnt->x );
+    result->y = sqrtf( pnt->y );
+    result->z = sqrtf( pnt->z );
+}
+
+static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->x = ( 1.0f / sqrtf( pnt->x ) );
+    result->y = ( 1.0f / sqrtf( pnt->y ) );
+    result->z = ( 1.0f / sqrtf( pnt->z ) );
+}
+
+static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->x = fabsf( pnt->x );
+    result->y = fabsf( pnt->y );
+    result->z = fabsf( pnt->z );
+}
+
+static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->x = ( pnt1->x < 0.0f )? -fabsf( pnt0->x ) : fabsf( pnt0->x );
+    result->y = ( pnt1->y < 0.0f )? -fabsf( pnt0->y ) : fabsf( pnt0->y );
+    result->z = ( pnt1->z < 0.0f )? -fabsf( pnt0->z ) : fabsf( pnt0->z );
+}
+
+static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->x = (pnt0->x > pnt1->x)? pnt0->x : pnt1->x;
+    result->y = (pnt0->y > pnt1->y)? pnt0->y : pnt1->y;
+    result->z = (pnt0->z > pnt1->z)? pnt0->z : pnt1->z;
+}
+
+static inline float vmathP3MaxElem( const VmathPoint3 *pnt )
+{
+    float result;
+    result = (pnt->x > pnt->y)? pnt->x : pnt->y;
+    result = (pnt->z > result)? pnt->z : result;
+    return result;
+}
+
+static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->x = (pnt0->x < pnt1->x)? pnt0->x : pnt1->x;
+    result->y = (pnt0->y < pnt1->y)? pnt0->y : pnt1->y;
+    result->z = (pnt0->z < pnt1->z)? pnt0->z : pnt1->z;
+}
+
+static inline float vmathP3MinElem( const VmathPoint3 *pnt )
+{
+    float result;
+    result = (pnt->x < pnt->y)? pnt->x : pnt->y;
+    result = (pnt->z < result)? pnt->z : result;
+    return result;
+}
+
+static inline float vmathP3Sum( const VmathPoint3 *pnt )
+{
+    float result;
+    result = ( pnt->x + pnt->y );
+    result = ( result + pnt->z );
+    return result;
+}
+
+static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal )
+{
+    VmathPoint3 tmpP3_0;
+    vmathP3MakeFromScalar( &tmpP3_0, scaleVal );
+    vmathP3MulPerElem( result, pnt, &tmpP3_0 );
+}
+
+static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec )
+{
+    VmathPoint3 tmpP3_0;
+    vmathP3MakeFromV3( &tmpP3_0, scaleVec );
+    vmathP3MulPerElem( result, pnt, &tmpP3_0 );
+}
+
+static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec )
+{
+    float result;
+    result = ( pnt->x * unitVec->x );
+    result = ( result + ( pnt->y * unitVec->y ) );
+    result = ( result + ( pnt->z * unitVec->z ) );
+    return result;
+}
+
+static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt )
+{
+    VmathVector3 tmpV3_0;
+    vmathV3MakeFromP3( &tmpV3_0, pnt );
+    return vmathV3LengthSqr( &tmpV3_0 );
+}
+
+static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt )
+{
+    VmathVector3 tmpV3_0;
+    vmathV3MakeFromP3( &tmpV3_0, pnt );
+    return vmathV3Length( &tmpV3_0 );
+}
+
+static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    VmathVector3 tmpV3_0;
+    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
+    return vmathV3LengthSqr( &tmpV3_0 );
+}
+
+static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    VmathVector3 tmpV3_0;
+    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
+    return vmathV3Length( &tmpV3_0 );
+}
+
+static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 )
+{
+    result->x = ( select1 )? pnt1->x : pnt0->x;
+    result->y = ( select1 )? pnt1->y : pnt0->y;
+    result->z = ( select1 )? pnt1->z : pnt0->z;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathP3Print( const VmathPoint3 *pnt )
+{
+    printf( "( %f %f %f )\n", pnt->x, pnt->y, pnt->z );
+}
+
+static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name )
+{
+    printf( "%s: ( %f %f %f )\n", name, pnt->x, pnt->y, pnt->z );
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/scalar/c/vec_aos_v.h b/Extras/vectormathlibrary/include/vectormath/scalar/c/vec_aos_v.h
index de8adb5ad..75c49370d 100644
--- a/Extras/vectormathlibrary/include/vectormath/scalar/c/vec_aos_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/scalar/c/vec_aos_v.h
@@ -1,848 +1,848 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_AOS_V_C_H
-#define _VECTORMATH_VEC_AOS_V_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- */
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline VmathVector3 vmathV3MakeFromElems_V( float _x, float _y, float _z )
-{
-    VmathVector3 result;
-    vmathV3MakeFromElems(&result, _x, _y, _z);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeFromP3_V( VmathPoint3 pnt )
-{
-    VmathVector3 result;
-    vmathV3MakeFromP3(&result, &pnt);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeFromScalar_V( float scalar )
-{
-    VmathVector3 result;
-    vmathV3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeXAxis_V( )
-{
-    VmathVector3 result;
-    vmathV3MakeXAxis(&result);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeYAxis_V( )
-{
-    VmathVector3 result;
-    vmathV3MakeYAxis(&result);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeZAxis_V( )
-{
-    VmathVector3 result;
-    vmathV3MakeZAxis(&result);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Lerp_V( float t, VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3Lerp(&result, t, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Slerp_V( float t, VmathVector3 unitVec0, VmathVector3 unitVec1 )
-{
-    VmathVector3 result;
-    vmathV3Slerp(&result, t, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline void vmathV3SetX_V( VmathVector3 *result, float _x )
-{
-    vmathV3SetX(result, _x);
-}
-
-static inline float vmathV3GetX_V( VmathVector3 vec )
-{
-    return vmathV3GetX(&vec);
-}
-
-static inline void vmathV3SetY_V( VmathVector3 *result, float _y )
-{
-    vmathV3SetY(result, _y);
-}
-
-static inline float vmathV3GetY_V( VmathVector3 vec )
-{
-    return vmathV3GetY(&vec);
-}
-
-static inline void vmathV3SetZ_V( VmathVector3 *result, float _z )
-{
-    vmathV3SetZ(result, _z);
-}
-
-static inline float vmathV3GetZ_V( VmathVector3 vec )
-{
-    return vmathV3GetZ(&vec);
-}
-
-static inline void vmathV3SetElem_V( VmathVector3 *result, int idx, float value )
-{
-    vmathV3SetElem(result, idx, value);
-}
-
-static inline float vmathV3GetElem_V( VmathVector3 vec, int idx )
-{
-    return vmathV3GetElem(&vec, idx);
-}
-
-static inline VmathVector3 vmathV3Add_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3Add(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Sub_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3Sub(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathPoint3 vmathV3AddP3_V( VmathVector3 vec, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathV3AddP3(&result, &vec, &pnt1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3ScalarMul_V( VmathVector3 vec, float scalar )
-{
-    VmathVector3 result;
-    vmathV3ScalarMul(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathVector3 vmathV3ScalarDiv_V( VmathVector3 vec, float scalar )
-{
-    VmathVector3 result;
-    vmathV3ScalarDiv(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Neg_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3Neg(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MulPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3MulPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3DivPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3DivPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3RecipPerElem_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3RecipPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3SqrtPerElem_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3SqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3RsqrtPerElem_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3RsqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3AbsPerElem_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3AbsPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3CopySignPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3CopySignPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MaxPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3MaxPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline float vmathV3MaxElem_V( VmathVector3 vec )
-{
-    return vmathV3MaxElem(&vec);
-}
-
-static inline VmathVector3 vmathV3MinPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3MinPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline float vmathV3MinElem_V( VmathVector3 vec )
-{
-    return vmathV3MinElem(&vec);
-}
-
-static inline float vmathV3Sum_V( VmathVector3 vec )
-{
-    return vmathV3Sum(&vec);
-}
-
-static inline float vmathV3Dot_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    return vmathV3Dot(&vec0, &vec1);
-}
-
-static inline float vmathV3LengthSqr_V( VmathVector3 vec )
-{
-    return vmathV3LengthSqr(&vec);
-}
-
-static inline float vmathV3Length_V( VmathVector3 vec )
-{
-    return vmathV3Length(&vec);
-}
-
-static inline VmathVector3 vmathV3Normalize_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3Normalize(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Cross_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3Cross(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Select_V( VmathVector3 vec0, VmathVector3 vec1, unsigned int select1 )
-{
-    VmathVector3 result;
-    vmathV3Select(&result, &vec0, &vec1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathV3Print_V( VmathVector3 vec )
-{
-    vmathV3Print(&vec);
-}
-
-static inline void vmathV3Prints_V( VmathVector3 vec, const char *name )
-{
-    vmathV3Prints(&vec, name);
-}
-
-#endif
-
-static inline VmathVector4 vmathV4MakeFromElems_V( float _x, float _y, float _z, float _w )
-{
-    VmathVector4 result;
-    vmathV4MakeFromElems(&result, _x, _y, _z, _w);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromV3Scalar_V( VmathVector3 xyz, float _w )
-{
-    VmathVector4 result;
-    vmathV4MakeFromV3Scalar(&result, &xyz, _w);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromV3_V( VmathVector3 vec )
-{
-    VmathVector4 result;
-    vmathV4MakeFromV3(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromP3_V( VmathPoint3 pnt )
-{
-    VmathVector4 result;
-    vmathV4MakeFromP3(&result, &pnt);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromQ_V( VmathQuat quat )
-{
-    VmathVector4 result;
-    vmathV4MakeFromQ(&result, &quat);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromScalar_V( float scalar )
-{
-    VmathVector4 result;
-    vmathV4MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeXAxis_V( )
-{
-    VmathVector4 result;
-    vmathV4MakeXAxis(&result);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeYAxis_V( )
-{
-    VmathVector4 result;
-    vmathV4MakeYAxis(&result);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeZAxis_V( )
-{
-    VmathVector4 result;
-    vmathV4MakeZAxis(&result);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeWAxis_V( )
-{
-    VmathVector4 result;
-    vmathV4MakeWAxis(&result);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Lerp_V( float t, VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4Lerp(&result, t, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Slerp_V( float t, VmathVector4 unitVec0, VmathVector4 unitVec1 )
-{
-    VmathVector4 result;
-    vmathV4Slerp(&result, t, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline void vmathV4SetXYZ_V( VmathVector4 *result, VmathVector3 vec )
-{
-    vmathV4SetXYZ(result, &vec);
-}
-
-static inline VmathVector3 vmathV4GetXYZ_V( VmathVector4 vec )
-{
-    VmathVector3 result;
-    vmathV4GetXYZ(&result, &vec);
-    return result;
-}
-
-static inline void vmathV4SetX_V( VmathVector4 *result, float _x )
-{
-    vmathV4SetX(result, _x);
-}
-
-static inline float vmathV4GetX_V( VmathVector4 vec )
-{
-    return vmathV4GetX(&vec);
-}
-
-static inline void vmathV4SetY_V( VmathVector4 *result, float _y )
-{
-    vmathV4SetY(result, _y);
-}
-
-static inline float vmathV4GetY_V( VmathVector4 vec )
-{
-    return vmathV4GetY(&vec);
-}
-
-static inline void vmathV4SetZ_V( VmathVector4 *result, float _z )
-{
-    vmathV4SetZ(result, _z);
-}
-
-static inline float vmathV4GetZ_V( VmathVector4 vec )
-{
-    return vmathV4GetZ(&vec);
-}
-
-static inline void vmathV4SetW_V( VmathVector4 *result, float _w )
-{
-    vmathV4SetW(result, _w);
-}
-
-static inline float vmathV4GetW_V( VmathVector4 vec )
-{
-    return vmathV4GetW(&vec);
-}
-
-static inline void vmathV4SetElem_V( VmathVector4 *result, int idx, float value )
-{
-    vmathV4SetElem(result, idx, value);
-}
-
-static inline float vmathV4GetElem_V( VmathVector4 vec, int idx )
-{
-    return vmathV4GetElem(&vec, idx);
-}
-
-static inline VmathVector4 vmathV4Add_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4Add(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Sub_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4Sub(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4ScalarMul_V( VmathVector4 vec, float scalar )
-{
-    VmathVector4 result;
-    vmathV4ScalarMul(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathVector4 vmathV4ScalarDiv_V( VmathVector4 vec, float scalar )
-{
-    VmathVector4 result;
-    vmathV4ScalarDiv(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Neg_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4Neg(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MulPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4MulPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4DivPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4DivPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4RecipPerElem_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4RecipPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4SqrtPerElem_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4SqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4RsqrtPerElem_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4RsqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4AbsPerElem_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4AbsPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4CopySignPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4CopySignPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MaxPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4MaxPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline float vmathV4MaxElem_V( VmathVector4 vec )
-{
-    return vmathV4MaxElem(&vec);
-}
-
-static inline VmathVector4 vmathV4MinPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4MinPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline float vmathV4MinElem_V( VmathVector4 vec )
-{
-    return vmathV4MinElem(&vec);
-}
-
-static inline float vmathV4Sum_V( VmathVector4 vec )
-{
-    return vmathV4Sum(&vec);
-}
-
-static inline float vmathV4Dot_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    return vmathV4Dot(&vec0, &vec1);
-}
-
-static inline float vmathV4LengthSqr_V( VmathVector4 vec )
-{
-    return vmathV4LengthSqr(&vec);
-}
-
-static inline float vmathV4Length_V( VmathVector4 vec )
-{
-    return vmathV4Length(&vec);
-}
-
-static inline VmathVector4 vmathV4Normalize_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4Normalize(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Select_V( VmathVector4 vec0, VmathVector4 vec1, unsigned int select1 )
-{
-    VmathVector4 result;
-    vmathV4Select(&result, &vec0, &vec1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathV4Print_V( VmathVector4 vec )
-{
-    vmathV4Print(&vec);
-}
-
-static inline void vmathV4Prints_V( VmathVector4 vec, const char *name )
-{
-    vmathV4Prints(&vec, name);
-}
-
-#endif
-
-static inline VmathPoint3 vmathP3MakeFromElems_V( float _x, float _y, float _z )
-{
-    VmathPoint3 result;
-    vmathP3MakeFromElems(&result, _x, _y, _z);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MakeFromV3_V( VmathVector3 vec )
-{
-    VmathPoint3 result;
-    vmathP3MakeFromV3(&result, &vec);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MakeFromScalar_V( float scalar )
-{
-    VmathPoint3 result;
-    vmathP3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3Lerp_V( float t, VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3Lerp(&result, t, &pnt0, &pnt1);
-    return result;
-}
-
-static inline void vmathP3SetX_V( VmathPoint3 *result, float _x )
-{
-    vmathP3SetX(result, _x);
-}
-
-static inline float vmathP3GetX_V( VmathPoint3 pnt )
-{
-    return vmathP3GetX(&pnt);
-}
-
-static inline void vmathP3SetY_V( VmathPoint3 *result, float _y )
-{
-    vmathP3SetY(result, _y);
-}
-
-static inline float vmathP3GetY_V( VmathPoint3 pnt )
-{
-    return vmathP3GetY(&pnt);
-}
-
-static inline void vmathP3SetZ_V( VmathPoint3 *result, float _z )
-{
-    vmathP3SetZ(result, _z);
-}
-
-static inline float vmathP3GetZ_V( VmathPoint3 pnt )
-{
-    return vmathP3GetZ(&pnt);
-}
-
-static inline void vmathP3SetElem_V( VmathPoint3 *result, int idx, float value )
-{
-    vmathP3SetElem(result, idx, value);
-}
-
-static inline float vmathP3GetElem_V( VmathPoint3 pnt, int idx )
-{
-    return vmathP3GetElem(&pnt, idx);
-}
-
-static inline VmathVector3 vmathP3Sub_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathVector3 result;
-    vmathP3Sub(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3AddV3_V( VmathPoint3 pnt, VmathVector3 vec1 )
-{
-    VmathPoint3 result;
-    vmathP3AddV3(&result, &pnt, &vec1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3SubV3_V( VmathPoint3 pnt, VmathVector3 vec1 )
-{
-    VmathPoint3 result;
-    vmathP3SubV3(&result, &pnt, &vec1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MulPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3MulPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3DivPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3DivPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3RecipPerElem_V( VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathP3RecipPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3SqrtPerElem_V( VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathP3SqrtPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3RsqrtPerElem_V( VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathP3RsqrtPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3AbsPerElem_V( VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathP3AbsPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3CopySignPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3CopySignPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MaxPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3MaxPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline float vmathP3MaxElem_V( VmathPoint3 pnt )
-{
-    return vmathP3MaxElem(&pnt);
-}
-
-static inline VmathPoint3 vmathP3MinPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3MinPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline float vmathP3MinElem_V( VmathPoint3 pnt )
-{
-    return vmathP3MinElem(&pnt);
-}
-
-static inline float vmathP3Sum_V( VmathPoint3 pnt )
-{
-    return vmathP3Sum(&pnt);
-}
-
-static inline VmathPoint3 vmathP3Scale_V( VmathPoint3 pnt, float scaleVal )
-{
-    VmathPoint3 result;
-    vmathP3Scale(&result, &pnt, scaleVal);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3NonUniformScale_V( VmathPoint3 pnt, VmathVector3 scaleVec )
-{
-    VmathPoint3 result;
-    vmathP3NonUniformScale(&result, &pnt, &scaleVec);
-    return result;
-}
-
-static inline float vmathP3Projection_V( VmathPoint3 pnt, VmathVector3 unitVec )
-{
-    return vmathP3Projection(&pnt, &unitVec);
-}
-
-static inline float vmathP3DistSqrFromOrigin_V( VmathPoint3 pnt )
-{
-    return vmathP3DistSqrFromOrigin(&pnt);
-}
-
-static inline float vmathP3DistFromOrigin_V( VmathPoint3 pnt )
-{
-    return vmathP3DistFromOrigin(&pnt);
-}
-
-static inline float vmathP3DistSqr_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    return vmathP3DistSqr(&pnt0, &pnt1);
-}
-
-static inline float vmathP3Dist_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    return vmathP3Dist(&pnt0, &pnt1);
-}
-
-static inline VmathPoint3 vmathP3Select_V( VmathPoint3 pnt0, VmathPoint3 pnt1, unsigned int select1 )
-{
-    VmathPoint3 result;
-    vmathP3Select(&result, &pnt0, &pnt1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathP3Print_V( VmathPoint3 pnt )
-{
-    vmathP3Print(&pnt);
-}
-
-static inline void vmathP3Prints_V( VmathPoint3 pnt, const char *name )
-{
-    vmathP3Prints(&pnt, name);
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_AOS_V_C_H
+#define _VECTORMATH_VEC_AOS_V_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ */
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline VmathVector3 vmathV3MakeFromElems_V( float _x, float _y, float _z )
+{
+    VmathVector3 result;
+    vmathV3MakeFromElems(&result, _x, _y, _z);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeFromP3_V( VmathPoint3 pnt )
+{
+    VmathVector3 result;
+    vmathV3MakeFromP3(&result, &pnt);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeFromScalar_V( float scalar )
+{
+    VmathVector3 result;
+    vmathV3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeXAxis_V( )
+{
+    VmathVector3 result;
+    vmathV3MakeXAxis(&result);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeYAxis_V( )
+{
+    VmathVector3 result;
+    vmathV3MakeYAxis(&result);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeZAxis_V( )
+{
+    VmathVector3 result;
+    vmathV3MakeZAxis(&result);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Lerp_V( float t, VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3Lerp(&result, t, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Slerp_V( float t, VmathVector3 unitVec0, VmathVector3 unitVec1 )
+{
+    VmathVector3 result;
+    vmathV3Slerp(&result, t, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline void vmathV3SetX_V( VmathVector3 *result, float _x )
+{
+    vmathV3SetX(result, _x);
+}
+
+static inline float vmathV3GetX_V( VmathVector3 vec )
+{
+    return vmathV3GetX(&vec);
+}
+
+static inline void vmathV3SetY_V( VmathVector3 *result, float _y )
+{
+    vmathV3SetY(result, _y);
+}
+
+static inline float vmathV3GetY_V( VmathVector3 vec )
+{
+    return vmathV3GetY(&vec);
+}
+
+static inline void vmathV3SetZ_V( VmathVector3 *result, float _z )
+{
+    vmathV3SetZ(result, _z);
+}
+
+static inline float vmathV3GetZ_V( VmathVector3 vec )
+{
+    return vmathV3GetZ(&vec);
+}
+
+static inline void vmathV3SetElem_V( VmathVector3 *result, int idx, float value )
+{
+    vmathV3SetElem(result, idx, value);
+}
+
+static inline float vmathV3GetElem_V( VmathVector3 vec, int idx )
+{
+    return vmathV3GetElem(&vec, idx);
+}
+
+static inline VmathVector3 vmathV3Add_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3Add(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Sub_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3Sub(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathPoint3 vmathV3AddP3_V( VmathVector3 vec, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathV3AddP3(&result, &vec, &pnt1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3ScalarMul_V( VmathVector3 vec, float scalar )
+{
+    VmathVector3 result;
+    vmathV3ScalarMul(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathVector3 vmathV3ScalarDiv_V( VmathVector3 vec, float scalar )
+{
+    VmathVector3 result;
+    vmathV3ScalarDiv(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Neg_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3Neg(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MulPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3MulPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3DivPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3DivPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3RecipPerElem_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3RecipPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3SqrtPerElem_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3SqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3RsqrtPerElem_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3RsqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3AbsPerElem_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3AbsPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3CopySignPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3CopySignPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MaxPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3MaxPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline float vmathV3MaxElem_V( VmathVector3 vec )
+{
+    return vmathV3MaxElem(&vec);
+}
+
+static inline VmathVector3 vmathV3MinPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3MinPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline float vmathV3MinElem_V( VmathVector3 vec )
+{
+    return vmathV3MinElem(&vec);
+}
+
+static inline float vmathV3Sum_V( VmathVector3 vec )
+{
+    return vmathV3Sum(&vec);
+}
+
+static inline float vmathV3Dot_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    return vmathV3Dot(&vec0, &vec1);
+}
+
+static inline float vmathV3LengthSqr_V( VmathVector3 vec )
+{
+    return vmathV3LengthSqr(&vec);
+}
+
+static inline float vmathV3Length_V( VmathVector3 vec )
+{
+    return vmathV3Length(&vec);
+}
+
+static inline VmathVector3 vmathV3Normalize_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3Normalize(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Cross_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3Cross(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Select_V( VmathVector3 vec0, VmathVector3 vec1, unsigned int select1 )
+{
+    VmathVector3 result;
+    vmathV3Select(&result, &vec0, &vec1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathV3Print_V( VmathVector3 vec )
+{
+    vmathV3Print(&vec);
+}
+
+static inline void vmathV3Prints_V( VmathVector3 vec, const char *name )
+{
+    vmathV3Prints(&vec, name);
+}
+
+#endif
+
+static inline VmathVector4 vmathV4MakeFromElems_V( float _x, float _y, float _z, float _w )
+{
+    VmathVector4 result;
+    vmathV4MakeFromElems(&result, _x, _y, _z, _w);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromV3Scalar_V( VmathVector3 xyz, float _w )
+{
+    VmathVector4 result;
+    vmathV4MakeFromV3Scalar(&result, &xyz, _w);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromV3_V( VmathVector3 vec )
+{
+    VmathVector4 result;
+    vmathV4MakeFromV3(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromP3_V( VmathPoint3 pnt )
+{
+    VmathVector4 result;
+    vmathV4MakeFromP3(&result, &pnt);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromQ_V( VmathQuat quat )
+{
+    VmathVector4 result;
+    vmathV4MakeFromQ(&result, &quat);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromScalar_V( float scalar )
+{
+    VmathVector4 result;
+    vmathV4MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeXAxis_V( )
+{
+    VmathVector4 result;
+    vmathV4MakeXAxis(&result);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeYAxis_V( )
+{
+    VmathVector4 result;
+    vmathV4MakeYAxis(&result);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeZAxis_V( )
+{
+    VmathVector4 result;
+    vmathV4MakeZAxis(&result);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeWAxis_V( )
+{
+    VmathVector4 result;
+    vmathV4MakeWAxis(&result);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Lerp_V( float t, VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4Lerp(&result, t, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Slerp_V( float t, VmathVector4 unitVec0, VmathVector4 unitVec1 )
+{
+    VmathVector4 result;
+    vmathV4Slerp(&result, t, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline void vmathV4SetXYZ_V( VmathVector4 *result, VmathVector3 vec )
+{
+    vmathV4SetXYZ(result, &vec);
+}
+
+static inline VmathVector3 vmathV4GetXYZ_V( VmathVector4 vec )
+{
+    VmathVector3 result;
+    vmathV4GetXYZ(&result, &vec);
+    return result;
+}
+
+static inline void vmathV4SetX_V( VmathVector4 *result, float _x )
+{
+    vmathV4SetX(result, _x);
+}
+
+static inline float vmathV4GetX_V( VmathVector4 vec )
+{
+    return vmathV4GetX(&vec);
+}
+
+static inline void vmathV4SetY_V( VmathVector4 *result, float _y )
+{
+    vmathV4SetY(result, _y);
+}
+
+static inline float vmathV4GetY_V( VmathVector4 vec )
+{
+    return vmathV4GetY(&vec);
+}
+
+static inline void vmathV4SetZ_V( VmathVector4 *result, float _z )
+{
+    vmathV4SetZ(result, _z);
+}
+
+static inline float vmathV4GetZ_V( VmathVector4 vec )
+{
+    return vmathV4GetZ(&vec);
+}
+
+static inline void vmathV4SetW_V( VmathVector4 *result, float _w )
+{
+    vmathV4SetW(result, _w);
+}
+
+static inline float vmathV4GetW_V( VmathVector4 vec )
+{
+    return vmathV4GetW(&vec);
+}
+
+static inline void vmathV4SetElem_V( VmathVector4 *result, int idx, float value )
+{
+    vmathV4SetElem(result, idx, value);
+}
+
+static inline float vmathV4GetElem_V( VmathVector4 vec, int idx )
+{
+    return vmathV4GetElem(&vec, idx);
+}
+
+static inline VmathVector4 vmathV4Add_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4Add(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Sub_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4Sub(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4ScalarMul_V( VmathVector4 vec, float scalar )
+{
+    VmathVector4 result;
+    vmathV4ScalarMul(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathVector4 vmathV4ScalarDiv_V( VmathVector4 vec, float scalar )
+{
+    VmathVector4 result;
+    vmathV4ScalarDiv(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Neg_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4Neg(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MulPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4MulPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4DivPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4DivPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4RecipPerElem_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4RecipPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4SqrtPerElem_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4SqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4RsqrtPerElem_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4RsqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4AbsPerElem_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4AbsPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4CopySignPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4CopySignPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MaxPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4MaxPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline float vmathV4MaxElem_V( VmathVector4 vec )
+{
+    return vmathV4MaxElem(&vec);
+}
+
+static inline VmathVector4 vmathV4MinPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4MinPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline float vmathV4MinElem_V( VmathVector4 vec )
+{
+    return vmathV4MinElem(&vec);
+}
+
+static inline float vmathV4Sum_V( VmathVector4 vec )
+{
+    return vmathV4Sum(&vec);
+}
+
+static inline float vmathV4Dot_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    return vmathV4Dot(&vec0, &vec1);
+}
+
+static inline float vmathV4LengthSqr_V( VmathVector4 vec )
+{
+    return vmathV4LengthSqr(&vec);
+}
+
+static inline float vmathV4Length_V( VmathVector4 vec )
+{
+    return vmathV4Length(&vec);
+}
+
+static inline VmathVector4 vmathV4Normalize_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4Normalize(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Select_V( VmathVector4 vec0, VmathVector4 vec1, unsigned int select1 )
+{
+    VmathVector4 result;
+    vmathV4Select(&result, &vec0, &vec1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathV4Print_V( VmathVector4 vec )
+{
+    vmathV4Print(&vec);
+}
+
+static inline void vmathV4Prints_V( VmathVector4 vec, const char *name )
+{
+    vmathV4Prints(&vec, name);
+}
+
+#endif
+
+static inline VmathPoint3 vmathP3MakeFromElems_V( float _x, float _y, float _z )
+{
+    VmathPoint3 result;
+    vmathP3MakeFromElems(&result, _x, _y, _z);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MakeFromV3_V( VmathVector3 vec )
+{
+    VmathPoint3 result;
+    vmathP3MakeFromV3(&result, &vec);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MakeFromScalar_V( float scalar )
+{
+    VmathPoint3 result;
+    vmathP3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3Lerp_V( float t, VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3Lerp(&result, t, &pnt0, &pnt1);
+    return result;
+}
+
+static inline void vmathP3SetX_V( VmathPoint3 *result, float _x )
+{
+    vmathP3SetX(result, _x);
+}
+
+static inline float vmathP3GetX_V( VmathPoint3 pnt )
+{
+    return vmathP3GetX(&pnt);
+}
+
+static inline void vmathP3SetY_V( VmathPoint3 *result, float _y )
+{
+    vmathP3SetY(result, _y);
+}
+
+static inline float vmathP3GetY_V( VmathPoint3 pnt )
+{
+    return vmathP3GetY(&pnt);
+}
+
+static inline void vmathP3SetZ_V( VmathPoint3 *result, float _z )
+{
+    vmathP3SetZ(result, _z);
+}
+
+static inline float vmathP3GetZ_V( VmathPoint3 pnt )
+{
+    return vmathP3GetZ(&pnt);
+}
+
+static inline void vmathP3SetElem_V( VmathPoint3 *result, int idx, float value )
+{
+    vmathP3SetElem(result, idx, value);
+}
+
+static inline float vmathP3GetElem_V( VmathPoint3 pnt, int idx )
+{
+    return vmathP3GetElem(&pnt, idx);
+}
+
+static inline VmathVector3 vmathP3Sub_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathVector3 result;
+    vmathP3Sub(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3AddV3_V( VmathPoint3 pnt, VmathVector3 vec1 )
+{
+    VmathPoint3 result;
+    vmathP3AddV3(&result, &pnt, &vec1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3SubV3_V( VmathPoint3 pnt, VmathVector3 vec1 )
+{
+    VmathPoint3 result;
+    vmathP3SubV3(&result, &pnt, &vec1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MulPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3MulPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3DivPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3DivPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3RecipPerElem_V( VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathP3RecipPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3SqrtPerElem_V( VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathP3SqrtPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3RsqrtPerElem_V( VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathP3RsqrtPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3AbsPerElem_V( VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathP3AbsPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3CopySignPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3CopySignPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MaxPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3MaxPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline float vmathP3MaxElem_V( VmathPoint3 pnt )
+{
+    return vmathP3MaxElem(&pnt);
+}
+
+static inline VmathPoint3 vmathP3MinPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3MinPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline float vmathP3MinElem_V( VmathPoint3 pnt )
+{
+    return vmathP3MinElem(&pnt);
+}
+
+static inline float vmathP3Sum_V( VmathPoint3 pnt )
+{
+    return vmathP3Sum(&pnt);
+}
+
+static inline VmathPoint3 vmathP3Scale_V( VmathPoint3 pnt, float scaleVal )
+{
+    VmathPoint3 result;
+    vmathP3Scale(&result, &pnt, scaleVal);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3NonUniformScale_V( VmathPoint3 pnt, VmathVector3 scaleVec )
+{
+    VmathPoint3 result;
+    vmathP3NonUniformScale(&result, &pnt, &scaleVec);
+    return result;
+}
+
+static inline float vmathP3Projection_V( VmathPoint3 pnt, VmathVector3 unitVec )
+{
+    return vmathP3Projection(&pnt, &unitVec);
+}
+
+static inline float vmathP3DistSqrFromOrigin_V( VmathPoint3 pnt )
+{
+    return vmathP3DistSqrFromOrigin(&pnt);
+}
+
+static inline float vmathP3DistFromOrigin_V( VmathPoint3 pnt )
+{
+    return vmathP3DistFromOrigin(&pnt);
+}
+
+static inline float vmathP3DistSqr_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    return vmathP3DistSqr(&pnt0, &pnt1);
+}
+
+static inline float vmathP3Dist_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    return vmathP3Dist(&pnt0, &pnt1);
+}
+
+static inline VmathPoint3 vmathP3Select_V( VmathPoint3 pnt0, VmathPoint3 pnt1, unsigned int select1 )
+{
+    VmathPoint3 result;
+    vmathP3Select(&result, &pnt0, &pnt1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathP3Print_V( VmathPoint3 pnt )
+{
+    vmathP3Print(&pnt);
+}
+
+static inline void vmathP3Prints_V( VmathPoint3 pnt, const char *name )
+{
+    vmathP3Prints(&pnt, name);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/scalar/c/vectormath_aos.h b/Extras/vectormathlibrary/include/vectormath/scalar/c/vectormath_aos.h
index 73dfc8a54..26bee0b81 100644
--- a/Extras/vectormathlibrary/include/vectormath/scalar/c/vectormath_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/scalar/c/vectormath_aos.h
@@ -1,1879 +1,1879 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_AOS_C_SCALAR_H
-#define _VECTORMATH_AOS_C_SCALAR_H
-
-#include <math.h>
-
-#ifdef _VECTORMATH_DEBUG
-#include <stdio.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#ifndef _VECTORMATH_AOS_C_TYPES_H
-#define _VECTORMATH_AOS_C_TYPES_H
-
-/* A 3-D vector in array-of-structures format
- */
-typedef struct _VmathVector3
-{
-    float x;
-    float y;
-    float z;
-#ifndef __GNUC__
-    float d;
-#endif
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-VmathVector3;
-
-/* A 4-D vector in array-of-structures format
- */
-typedef struct _VmathVector4
-{
-    float x;
-    float y;
-    float z;
-    float w;
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-VmathVector4;
-
-/* A 3-D point in array-of-structures format
- */
-typedef struct _VmathPoint3
-{
-    float x;
-    float y;
-    float z;
-#ifndef __GNUC__
-    float d;
-#endif
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-VmathPoint3;
-
-/* A quaternion in array-of-structures format
- */
-typedef struct _VmathQuat
-{
-    float x;
-    float y;
-    float z;
-    float w;
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-VmathQuat;
-
-/* A 3x3 matrix in array-of-structures format
- */
-typedef struct _VmathMatrix3
-{
-    VmathVector3 col0;
-    VmathVector3 col1;
-    VmathVector3 col2;
-} VmathMatrix3;
-
-/* A 4x4 matrix in array-of-structures format
- */
-typedef struct _VmathMatrix4
-{
-    VmathVector4 col0;
-    VmathVector4 col1;
-    VmathVector4 col2;
-    VmathVector4 col3;
-} VmathMatrix4;
-
-/* A 3x4 transformation matrix in array-of-structures format
- */
-typedef struct _VmathTransform3
-{
-    VmathVector3 col0;
-    VmathVector3 col1;
-    VmathVector3 col2;
-    VmathVector3 col3;
-} VmathTransform3;
-
-#endif
-
-/*
- * Copy a 3-D vector
- */
-static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Construct a 3-D vector from x, y, and z elements
- */
-static inline void vmathV3MakeFromElems( VmathVector3 *result, float x, float y, float z );
-
-/*
- * Copy elements from a 3-D point into a 3-D vector
- */
-static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt );
-
-/*
- * Set all elements of a 3-D vector to the same scalar value
- */
-static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar );
-
-/*
- * Set the x element of a 3-D vector
- */
-static inline void vmathV3SetX( VmathVector3 *result, float x );
-
-/*
- * Set the y element of a 3-D vector
- */
-static inline void vmathV3SetY( VmathVector3 *result, float y );
-
-/*
- * Set the z element of a 3-D vector
- */
-static inline void vmathV3SetZ( VmathVector3 *result, float z );
-
-/*
- * Get the x element of a 3-D vector
- */
-static inline float vmathV3GetX( const VmathVector3 *vec );
-
-/*
- * Get the y element of a 3-D vector
- */
-static inline float vmathV3GetY( const VmathVector3 *vec );
-
-/*
- * Get the z element of a 3-D vector
- */
-static inline float vmathV3GetZ( const VmathVector3 *vec );
-
-/*
- * Set an x, y, or z element of a 3-D vector by index
- */
-static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value );
-
-/*
- * Get an x, y, or z element of a 3-D vector by index
- */
-static inline float vmathV3GetElem( const VmathVector3 *vec, int idx );
-
-/*
- * Add two 3-D vectors
- */
-static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Subtract a 3-D vector from another 3-D vector
- */
-static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Add a 3-D vector to a 3-D point
- */
-static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt );
-
-/*
- * Multiply a 3-D vector by a scalar
- */
-static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar );
-
-/*
- * Divide a 3-D vector by a scalar
- */
-static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar );
-
-/*
- * Negate all elements of a 3-D vector
- */
-static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Construct x axis
- */
-static inline void vmathV3MakeXAxis( VmathVector3 *result );
-
-/*
- * Construct y axis
- */
-static inline void vmathV3MakeYAxis( VmathVector3 *result );
-
-/*
- * Construct z axis
- */
-static inline void vmathV3MakeZAxis( VmathVector3 *result );
-
-/*
- * Multiply two 3-D vectors per element
- */
-static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Divide two 3-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Compute the reciprocal of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Compute the square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Compute the reciprocal square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Compute the absolute value of a 3-D vector per element
- */
-static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Copy sign from one 3-D vector to another, per element
- */
-static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Maximum of two 3-D vectors per element
- */
-static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Minimum of two 3-D vectors per element
- */
-static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Maximum element of a 3-D vector
- */
-static inline float vmathV3MaxElem( const VmathVector3 *vec );
-
-/*
- * Minimum element of a 3-D vector
- */
-static inline float vmathV3MinElem( const VmathVector3 *vec );
-
-/*
- * Compute the sum of all elements of a 3-D vector
- */
-static inline float vmathV3Sum( const VmathVector3 *vec );
-
-/*
- * Compute the dot product of two 3-D vectors
- */
-static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Compute the square of the length of a 3-D vector
- */
-static inline float vmathV3LengthSqr( const VmathVector3 *vec );
-
-/*
- * Compute the length of a 3-D vector
- */
-static inline float vmathV3Length( const VmathVector3 *vec );
-
-/*
- * Normalize a 3-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Compute cross product of two 3-D vectors
- */
-static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Outer product of two 3-D vectors
- */
-static inline void vmathV3Outer( VmathMatrix3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Pre-multiply a row vector by a 3x3 matrix
- */
-static inline void vmathV3RowMul( VmathVector3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat );
-
-/*
- * Cross-product matrix of a 3-D vector
- */
-static inline void vmathV3CrossMatrix( VmathMatrix3 *result, const VmathVector3 *vec );
-
-/*
- * Create cross-product matrix and multiply
- * NOTE: 
- * Faster than separately creating a cross-product matrix and multiplying.
- */
-static inline void vmathV3CrossMatrixMul( VmathMatrix3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat );
-
-/*
- * Linear interpolation between two 3-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Spherical linear interpolation between two 3-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 );
-
-/*
- * Conditionally select between two 3-D vectors
- */
-static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV3Print( const VmathVector3 *vec );
-
-/*
- * Print a 3-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV3Prints( const VmathVector3 *vec, const char *name );
-
-#endif
-
-/*
- * Copy a 4-D vector
- */
-static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Construct a 4-D vector from x, y, z, and w elements
- */
-static inline void vmathV4MakeFromElems( VmathVector4 *result, float x, float y, float z, float w );
-
-/*
- * Construct a 4-D vector from a 3-D vector and a scalar
- */
-static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float w );
-
-/*
- * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
- */
-static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec );
-
-/*
- * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
- */
-static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt );
-
-/*
- * Copy elements from a quaternion into a 4-D vector
- */
-static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat );
-
-/*
- * Set all elements of a 4-D vector to the same scalar value
- */
-static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar );
-
-/*
- * Set the x, y, and z elements of a 4-D vector
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec );
-
-/*
- * Get the x, y, and z elements of a 4-D vector
- */
-static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec );
-
-/*
- * Set the x element of a 4-D vector
- */
-static inline void vmathV4SetX( VmathVector4 *result, float x );
-
-/*
- * Set the y element of a 4-D vector
- */
-static inline void vmathV4SetY( VmathVector4 *result, float y );
-
-/*
- * Set the z element of a 4-D vector
- */
-static inline void vmathV4SetZ( VmathVector4 *result, float z );
-
-/*
- * Set the w element of a 4-D vector
- */
-static inline void vmathV4SetW( VmathVector4 *result, float w );
-
-/*
- * Get the x element of a 4-D vector
- */
-static inline float vmathV4GetX( const VmathVector4 *vec );
-
-/*
- * Get the y element of a 4-D vector
- */
-static inline float vmathV4GetY( const VmathVector4 *vec );
-
-/*
- * Get the z element of a 4-D vector
- */
-static inline float vmathV4GetZ( const VmathVector4 *vec );
-
-/*
- * Get the w element of a 4-D vector
- */
-static inline float vmathV4GetW( const VmathVector4 *vec );
-
-/*
- * Set an x, y, z, or w element of a 4-D vector by index
- */
-static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value );
-
-/*
- * Get an x, y, z, or w element of a 4-D vector by index
- */
-static inline float vmathV4GetElem( const VmathVector4 *vec, int idx );
-
-/*
- * Add two 4-D vectors
- */
-static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Subtract a 4-D vector from another 4-D vector
- */
-static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Multiply a 4-D vector by a scalar
- */
-static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar );
-
-/*
- * Divide a 4-D vector by a scalar
- */
-static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar );
-
-/*
- * Negate all elements of a 4-D vector
- */
-static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Construct x axis
- */
-static inline void vmathV4MakeXAxis( VmathVector4 *result );
-
-/*
- * Construct y axis
- */
-static inline void vmathV4MakeYAxis( VmathVector4 *result );
-
-/*
- * Construct z axis
- */
-static inline void vmathV4MakeZAxis( VmathVector4 *result );
-
-/*
- * Construct w axis
- */
-static inline void vmathV4MakeWAxis( VmathVector4 *result );
-
-/*
- * Multiply two 4-D vectors per element
- */
-static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Divide two 4-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Compute the reciprocal of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Compute the square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Compute the reciprocal square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Compute the absolute value of a 4-D vector per element
- */
-static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Copy sign from one 4-D vector to another, per element
- */
-static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Maximum of two 4-D vectors per element
- */
-static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Minimum of two 4-D vectors per element
- */
-static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Maximum element of a 4-D vector
- */
-static inline float vmathV4MaxElem( const VmathVector4 *vec );
-
-/*
- * Minimum element of a 4-D vector
- */
-static inline float vmathV4MinElem( const VmathVector4 *vec );
-
-/*
- * Compute the sum of all elements of a 4-D vector
- */
-static inline float vmathV4Sum( const VmathVector4 *vec );
-
-/*
- * Compute the dot product of two 4-D vectors
- */
-static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Compute the square of the length of a 4-D vector
- */
-static inline float vmathV4LengthSqr( const VmathVector4 *vec );
-
-/*
- * Compute the length of a 4-D vector
- */
-static inline float vmathV4Length( const VmathVector4 *vec );
-
-/*
- * Normalize a 4-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Outer product of two 4-D vectors
- */
-static inline void vmathV4Outer( VmathMatrix4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Linear interpolation between two 4-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Spherical linear interpolation between two 4-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 );
-
-/*
- * Conditionally select between two 4-D vectors
- */
-static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV4Print( const VmathVector4 *vec );
-
-/*
- * Print a 4-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV4Prints( const VmathVector4 *vec, const char *name );
-
-#endif
-
-/*
- * Copy a 3-D point
- */
-static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Construct a 3-D point from x, y, and z elements
- */
-static inline void vmathP3MakeFromElems( VmathPoint3 *result, float x, float y, float z );
-
-/*
- * Copy elements from a 3-D vector into a 3-D point
- */
-static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec );
-
-/*
- * Set all elements of a 3-D point to the same scalar value
- */
-static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar );
-
-/*
- * Set the x element of a 3-D point
- */
-static inline void vmathP3SetX( VmathPoint3 *result, float x );
-
-/*
- * Set the y element of a 3-D point
- */
-static inline void vmathP3SetY( VmathPoint3 *result, float y );
-
-/*
- * Set the z element of a 3-D point
- */
-static inline void vmathP3SetZ( VmathPoint3 *result, float z );
-
-/*
- * Get the x element of a 3-D point
- */
-static inline float vmathP3GetX( const VmathPoint3 *pnt );
-
-/*
- * Get the y element of a 3-D point
- */
-static inline float vmathP3GetY( const VmathPoint3 *pnt );
-
-/*
- * Get the z element of a 3-D point
- */
-static inline float vmathP3GetZ( const VmathPoint3 *pnt );
-
-/*
- * Set an x, y, or z element of a 3-D point by index
- */
-static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value );
-
-/*
- * Get an x, y, or z element of a 3-D point by index
- */
-static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx );
-
-/*
- * Subtract a 3-D point from another 3-D point
- */
-static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Add a 3-D point to a 3-D vector
- */
-static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec );
-
-/*
- * Subtract a 3-D vector from a 3-D point
- */
-static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec );
-
-/*
- * Multiply two 3-D points per element
- */
-static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Divide two 3-D points per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Compute the reciprocal of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Compute the square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Compute the reciprocal square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Compute the absolute value of a 3-D point per element
- */
-static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Copy sign from one 3-D point to another, per element
- */
-static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Maximum of two 3-D points per element
- */
-static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Minimum of two 3-D points per element
- */
-static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Maximum element of a 3-D point
- */
-static inline float vmathP3MaxElem( const VmathPoint3 *pnt );
-
-/*
- * Minimum element of a 3-D point
- */
-static inline float vmathP3MinElem( const VmathPoint3 *pnt );
-
-/*
- * Compute the sum of all elements of a 3-D point
- */
-static inline float vmathP3Sum( const VmathPoint3 *pnt );
-
-/*
- * Apply uniform scale to a 3-D point
- */
-static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal );
-
-/*
- * Apply non-uniform scale to a 3-D point
- */
-static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec );
-
-/*
- * Scalar projection of a 3-D point on a unit-length 3-D vector
- */
-static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec );
-
-/*
- * Compute the square of the distance of a 3-D point from the coordinate-system origin
- */
-static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt );
-
-/*
- * Compute the distance of a 3-D point from the coordinate-system origin
- */
-static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt );
-
-/*
- * Compute the square of the distance between two 3-D points
- */
-static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Compute the distance between two 3-D points
- */
-static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Linear interpolation between two 3-D points
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Conditionally select between two 3-D points
- */
-static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D point
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathP3Print( const VmathPoint3 *pnt );
-
-/*
- * Print a 3-D point and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name );
-
-#endif
-
-/*
- * Copy a quaternion
- */
-static inline void vmathQCopy( VmathQuat *result, const VmathQuat *quat );
-
-/*
- * Construct a quaternion from x, y, z, and w elements
- */
-static inline void vmathQMakeFromElems( VmathQuat *result, float x, float y, float z, float w );
-
-/*
- * Construct a quaternion from a 3-D vector and a scalar
- */
-static inline void vmathQMakeFromV3Scalar( VmathQuat *result, const VmathVector3 *xyz, float w );
-
-/*
- * Copy elements from a 4-D vector into a quaternion
- */
-static inline void vmathQMakeFromV4( VmathQuat *result, const VmathVector4 *vec );
-
-/*
- * Convert a rotation matrix to a unit-length quaternion
- */
-static inline void vmathQMakeFromM3( VmathQuat *result, const VmathMatrix3 *rotMat );
-
-/*
- * Set all elements of a quaternion to the same scalar value
- */
-static inline void vmathQMakeFromScalar( VmathQuat *result, float scalar );
-
-/*
- * Set the x, y, and z elements of a quaternion
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathQSetXYZ( VmathQuat *result, const VmathVector3 *vec );
-
-/*
- * Get the x, y, and z elements of a quaternion
- */
-static inline void vmathQGetXYZ( VmathVector3 *result, const VmathQuat *quat );
-
-/*
- * Set the x element of a quaternion
- */
-static inline void vmathQSetX( VmathQuat *result, float x );
-
-/*
- * Set the y element of a quaternion
- */
-static inline void vmathQSetY( VmathQuat *result, float y );
-
-/*
- * Set the z element of a quaternion
- */
-static inline void vmathQSetZ( VmathQuat *result, float z );
-
-/*
- * Set the w element of a quaternion
- */
-static inline void vmathQSetW( VmathQuat *result, float w );
-
-/*
- * Get the x element of a quaternion
- */
-static inline float vmathQGetX( const VmathQuat *quat );
-
-/*
- * Get the y element of a quaternion
- */
-static inline float vmathQGetY( const VmathQuat *quat );
-
-/*
- * Get the z element of a quaternion
- */
-static inline float vmathQGetZ( const VmathQuat *quat );
-
-/*
- * Get the w element of a quaternion
- */
-static inline float vmathQGetW( const VmathQuat *quat );
-
-/*
- * Set an x, y, z, or w element of a quaternion by index
- */
-static inline void vmathQSetElem( VmathQuat *result, int idx, float value );
-
-/*
- * Get an x, y, z, or w element of a quaternion by index
- */
-static inline float vmathQGetElem( const VmathQuat *quat, int idx );
-
-/*
- * Add two quaternions
- */
-static inline void vmathQAdd( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Subtract a quaternion from another quaternion
- */
-static inline void vmathQSub( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Multiply two quaternions
- */
-static inline void vmathQMul( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Multiply a quaternion by a scalar
- */
-static inline void vmathQScalarMul( VmathQuat *result, const VmathQuat *quat, float scalar );
-
-/*
- * Divide a quaternion by a scalar
- */
-static inline void vmathQScalarDiv( VmathQuat *result, const VmathQuat *quat, float scalar );
-
-/*
- * Negate all elements of a quaternion
- */
-static inline void vmathQNeg( VmathQuat *result, const VmathQuat *quat );
-
-/*
- * Construct an identity quaternion
- */
-static inline void vmathQMakeIdentity( VmathQuat *result );
-
-/*
- * Construct a quaternion to rotate between two unit-length 3-D vectors
- * NOTE: 
- * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
- */
-static inline void vmathQMakeRotationArc( VmathQuat *result, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 );
-
-/*
- * Construct a quaternion to rotate around a unit-length 3-D vector
- */
-static inline void vmathQMakeRotationAxis( VmathQuat *result, float radians, const VmathVector3 *unitVec );
-
-/*
- * Construct a quaternion to rotate around the x axis
- */
-static inline void vmathQMakeRotationX( VmathQuat *result, float radians );
-
-/*
- * Construct a quaternion to rotate around the y axis
- */
-static inline void vmathQMakeRotationY( VmathQuat *result, float radians );
-
-/*
- * Construct a quaternion to rotate around the z axis
- */
-static inline void vmathQMakeRotationZ( VmathQuat *result, float radians );
-
-/*
- * Compute the conjugate of a quaternion
- */
-static inline void vmathQConj( VmathQuat *result, const VmathQuat *quat );
-
-/*
- * Use a unit-length quaternion to rotate a 3-D vector
- */
-static inline void vmathQRotate( VmathVector3 *result, const VmathQuat *unitQuat, const VmathVector3 *vec );
-
-/*
- * Compute the dot product of two quaternions
- */
-static inline float vmathQDot( const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Compute the norm of a quaternion
- */
-static inline float vmathQNorm( const VmathQuat *quat );
-
-/*
- * Compute the length of a quaternion
- */
-static inline float vmathQLength( const VmathQuat *quat );
-
-/*
- * Normalize a quaternion
- * NOTE: 
- * The result is unpredictable when all elements of quat are at or near zero.
- */
-static inline void vmathQNormalize( VmathQuat *result, const VmathQuat *quat );
-
-/*
- * Linear interpolation between two quaternions
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathQLerp( VmathQuat *result, float t, const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Spherical linear interpolation between two quaternions
- * NOTE: 
- * Interpolates along the shortest path between orientations.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathQSlerp( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1 );
-
-/*
- * Spherical quadrangle interpolation
- */
-static inline void vmathQSquad( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1, const VmathQuat *unitQuat2, const VmathQuat *unitQuat3 );
-
-/*
- * Conditionally select between two quaternions
- */
-static inline void vmathQSelect( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a quaternion
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathQPrint( const VmathQuat *quat );
-
-/*
- * Print a quaternion and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathQPrints( const VmathQuat *quat, const char *name );
-
-#endif
-
-/*
- * Copy a 3x3 matrix
- */
-static inline void vmathM3Copy( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Construct a 3x3 matrix containing the specified columns
- */
-static inline void vmathM3MakeFromCols( VmathMatrix3 *result, const VmathVector3 *col0, const VmathVector3 *col1, const VmathVector3 *col2 );
-
-/*
- * Construct a 3x3 rotation matrix from a unit-length quaternion
- */
-static inline void vmathM3MakeFromQ( VmathMatrix3 *result, const VmathQuat *unitQuat );
-
-/*
- * Set all elements of a 3x3 matrix to the same scalar value
- */
-static inline void vmathM3MakeFromScalar( VmathMatrix3 *result, float scalar );
-
-/*
- * Set column 0 of a 3x3 matrix
- */
-static inline void vmathM3SetCol0( VmathMatrix3 *result, const VmathVector3 *col0 );
-
-/*
- * Set column 1 of a 3x3 matrix
- */
-static inline void vmathM3SetCol1( VmathMatrix3 *result, const VmathVector3 *col1 );
-
-/*
- * Set column 2 of a 3x3 matrix
- */
-static inline void vmathM3SetCol2( VmathMatrix3 *result, const VmathVector3 *col2 );
-
-/*
- * Get column 0 of a 3x3 matrix
- */
-static inline void vmathM3GetCol0( VmathVector3 *result, const VmathMatrix3 *mat );
-
-/*
- * Get column 1 of a 3x3 matrix
- */
-static inline void vmathM3GetCol1( VmathVector3 *result, const VmathMatrix3 *mat );
-
-/*
- * Get column 2 of a 3x3 matrix
- */
-static inline void vmathM3GetCol2( VmathVector3 *result, const VmathMatrix3 *mat );
-
-/*
- * Set the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3SetCol( VmathMatrix3 *result, int col, const VmathVector3 *vec );
-
-/*
- * Set the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3SetRow( VmathMatrix3 *result, int row, const VmathVector3 *vec );
-
-/*
- * Get the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3GetCol( VmathVector3 *result, const VmathMatrix3 *mat, int col );
-
-/*
- * Get the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3GetRow( VmathVector3 *result, const VmathMatrix3 *mat, int row );
-
-/*
- * Set the element of a 3x3 matrix referred to by column and row indices
- */
-static inline void vmathM3SetElem( VmathMatrix3 *result, int col, int row, float val );
-
-/*
- * Get the element of a 3x3 matrix referred to by column and row indices
- */
-static inline float vmathM3GetElem( const VmathMatrix3 *mat, int col, int row );
-
-/*
- * Add two 3x3 matrices
- */
-static inline void vmathM3Add( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
-
-/*
- * Subtract a 3x3 matrix from another 3x3 matrix
- */
-static inline void vmathM3Sub( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
-
-/*
- * Negate all elements of a 3x3 matrix
- */
-static inline void vmathM3Neg( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Multiply a 3x3 matrix by a scalar
- */
-static inline void vmathM3ScalarMul( VmathMatrix3 *result, const VmathMatrix3 *mat, float scalar );
-
-/*
- * Multiply a 3x3 matrix by a 3-D vector
- */
-static inline void vmathM3MulV3( VmathVector3 *result, const VmathMatrix3 *mat, const VmathVector3 *vec );
-
-/*
- * Multiply two 3x3 matrices
- */
-static inline void vmathM3Mul( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
-
-/*
- * Construct an identity 3x3 matrix
- */
-static inline void vmathM3MakeIdentity( VmathMatrix3 *result );
-
-/*
- * Construct a 3x3 matrix to rotate around the x axis
- */
-static inline void vmathM3MakeRotationX( VmathMatrix3 *result, float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the y axis
- */
-static inline void vmathM3MakeRotationY( VmathMatrix3 *result, float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the z axis
- */
-static inline void vmathM3MakeRotationZ( VmathMatrix3 *result, float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the x, y, and z axes
- */
-static inline void vmathM3MakeRotationZYX( VmathMatrix3 *result, const VmathVector3 *radiansXYZ );
-
-/*
- * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathM3MakeRotationAxis( VmathMatrix3 *result, float radians, const VmathVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathM3MakeRotationQ( VmathMatrix3 *result, const VmathQuat *unitQuat );
-
-/*
- * Construct a 3x3 matrix to perform scaling
- */
-static inline void vmathM3MakeScale( VmathMatrix3 *result, const VmathVector3 *scaleVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathM3AppendScale( VmathMatrix3 *result, const VmathMatrix3 *mat, const VmathVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathM3PrependScale( VmathMatrix3 *result, const VmathVector3 *scaleVec, const VmathMatrix3 *mat );
-
-/*
- * Multiply two 3x3 matrices per element
- */
-static inline void vmathM3MulPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
-
-/*
- * Compute the absolute value of a 3x3 matrix per element
- */
-static inline void vmathM3AbsPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Transpose of a 3x3 matrix
- */
-static inline void vmathM3Transpose( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Compute the inverse of a 3x3 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathM3Inverse( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Determinant of a 3x3 matrix
- */
-static inline float vmathM3Determinant( const VmathMatrix3 *mat );
-
-/*
- * Conditionally select between two 3x3 matrices
- */
-static inline void vmathM3Select( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x3 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM3Print( const VmathMatrix3 *mat );
-
-/*
- * Print a 3x3 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM3Prints( const VmathMatrix3 *mat, const char *name );
-
-#endif
-
-/*
- * Copy a 4x4 matrix
- */
-static inline void vmathM4Copy( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Construct a 4x4 matrix containing the specified columns
- */
-static inline void vmathM4MakeFromCols( VmathMatrix4 *result, const VmathVector4 *col0, const VmathVector4 *col1, const VmathVector4 *col2, const VmathVector4 *col3 );
-
-/*
- * Construct a 4x4 matrix from a 3x4 transformation matrix
- */
-static inline void vmathM4MakeFromT3( VmathMatrix4 *result, const VmathTransform3 *mat );
-
-/*
- * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
- */
-static inline void vmathM4MakeFromM3V3( VmathMatrix4 *result, const VmathMatrix3 *mat, const VmathVector3 *translateVec );
-
-/*
- * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
- */
-static inline void vmathM4MakeFromQV3( VmathMatrix4 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec );
-
-/*
- * Set all elements of a 4x4 matrix to the same scalar value
- */
-static inline void vmathM4MakeFromScalar( VmathMatrix4 *result, float scalar );
-
-/*
- * Set the upper-left 3x3 submatrix
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathM4SetUpper3x3( VmathMatrix4 *result, const VmathMatrix3 *mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 4x4 matrix
- */
-static inline void vmathM4GetUpper3x3( VmathMatrix3 *result, const VmathMatrix4 *mat );
-
-/*
- * Set translation component
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathM4SetTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec );
-
-/*
- * Get the translation component of a 4x4 matrix
- */
-static inline void vmathM4GetTranslation( VmathVector3 *result, const VmathMatrix4 *mat );
-
-/*
- * Set column 0 of a 4x4 matrix
- */
-static inline void vmathM4SetCol0( VmathMatrix4 *result, const VmathVector4 *col0 );
-
-/*
- * Set column 1 of a 4x4 matrix
- */
-static inline void vmathM4SetCol1( VmathMatrix4 *result, const VmathVector4 *col1 );
-
-/*
- * Set column 2 of a 4x4 matrix
- */
-static inline void vmathM4SetCol2( VmathMatrix4 *result, const VmathVector4 *col2 );
-
-/*
- * Set column 3 of a 4x4 matrix
- */
-static inline void vmathM4SetCol3( VmathMatrix4 *result, const VmathVector4 *col3 );
-
-/*
- * Get column 0 of a 4x4 matrix
- */
-static inline void vmathM4GetCol0( VmathVector4 *result, const VmathMatrix4 *mat );
-
-/*
- * Get column 1 of a 4x4 matrix
- */
-static inline void vmathM4GetCol1( VmathVector4 *result, const VmathMatrix4 *mat );
-
-/*
- * Get column 2 of a 4x4 matrix
- */
-static inline void vmathM4GetCol2( VmathVector4 *result, const VmathMatrix4 *mat );
-
-/*
- * Get column 3 of a 4x4 matrix
- */
-static inline void vmathM4GetCol3( VmathVector4 *result, const VmathMatrix4 *mat );
-
-/*
- * Set the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4SetCol( VmathMatrix4 *result, int col, const VmathVector4 *vec );
-
-/*
- * Set the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4SetRow( VmathMatrix4 *result, int row, const VmathVector4 *vec );
-
-/*
- * Get the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4GetCol( VmathVector4 *result, const VmathMatrix4 *mat, int col );
-
-/*
- * Get the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4GetRow( VmathVector4 *result, const VmathMatrix4 *mat, int row );
-
-/*
- * Set the element of a 4x4 matrix referred to by column and row indices
- */
-static inline void vmathM4SetElem( VmathMatrix4 *result, int col, int row, float val );
-
-/*
- * Get the element of a 4x4 matrix referred to by column and row indices
- */
-static inline float vmathM4GetElem( const VmathMatrix4 *mat, int col, int row );
-
-/*
- * Add two 4x4 matrices
- */
-static inline void vmathM4Add( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
-
-/*
- * Subtract a 4x4 matrix from another 4x4 matrix
- */
-static inline void vmathM4Sub( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
-
-/*
- * Negate all elements of a 4x4 matrix
- */
-static inline void vmathM4Neg( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Multiply a 4x4 matrix by a scalar
- */
-static inline void vmathM4ScalarMul( VmathMatrix4 *result, const VmathMatrix4 *mat, float scalar );
-
-/*
- * Multiply a 4x4 matrix by a 4-D vector
- */
-static inline void vmathM4MulV4( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector4 *vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D vector
- */
-static inline void vmathM4MulV3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector3 *vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D point
- */
-static inline void vmathM4MulP3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathPoint3 *pnt );
-
-/*
- * Multiply two 4x4 matrices
- */
-static inline void vmathM4Mul( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
-
-/*
- * Multiply a 4x4 matrix by a 3x4 transformation matrix
- */
-static inline void vmathM4MulT3( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathTransform3 *tfrm );
-
-/*
- * Construct an identity 4x4 matrix
- */
-static inline void vmathM4MakeIdentity( VmathMatrix4 *result );
-
-/*
- * Construct a 4x4 matrix to rotate around the x axis
- */
-static inline void vmathM4MakeRotationX( VmathMatrix4 *result, float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the y axis
- */
-static inline void vmathM4MakeRotationY( VmathMatrix4 *result, float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the z axis
- */
-static inline void vmathM4MakeRotationZ( VmathMatrix4 *result, float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the x, y, and z axes
- */
-static inline void vmathM4MakeRotationZYX( VmathMatrix4 *result, const VmathVector3 *radiansXYZ );
-
-/*
- * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathM4MakeRotationAxis( VmathMatrix4 *result, float radians, const VmathVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathM4MakeRotationQ( VmathMatrix4 *result, const VmathQuat *unitQuat );
-
-/*
- * Construct a 4x4 matrix to perform scaling
- */
-static inline void vmathM4MakeScale( VmathMatrix4 *result, const VmathVector3 *scaleVec );
-
-/*
- * Construct a 4x4 matrix to perform translation
- */
-static inline void vmathM4MakeTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec );
-
-/*
- * Construct viewing matrix based on eye position, position looked at, and up direction
- */
-static inline void vmathM4MakeLookAt( VmathMatrix4 *result, const VmathPoint3 *eyePos, const VmathPoint3 *lookAtPos, const VmathVector3 *upVec );
-
-/*
- * Construct a perspective projection matrix
- */
-static inline void vmathM4MakePerspective( VmathMatrix4 *result, float fovyRadians, float aspect, float zNear, float zFar );
-
-/*
- * Construct a perspective projection matrix based on frustum
- */
-static inline void vmathM4MakeFrustum( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar );
-
-/*
- * Construct an orthographic projection matrix
- */
-static inline void vmathM4MakeOrthographic( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar );
-
-/*
- * Append (post-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathM4AppendScale( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathM4PrependScale( VmathMatrix4 *result, const VmathVector3 *scaleVec, const VmathMatrix4 *mat );
-
-/*
- * Multiply two 4x4 matrices per element
- */
-static inline void vmathM4MulPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
-
-/*
- * Compute the absolute value of a 4x4 matrix per element
- */
-static inline void vmathM4AbsPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Transpose of a 4x4 matrix
- */
-static inline void vmathM4Transpose( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathM4Inverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathM4AffineInverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
- */
-static inline void vmathM4OrthoInverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Determinant of a 4x4 matrix
- */
-static inline float vmathM4Determinant( const VmathMatrix4 *mat );
-
-/*
- * Conditionally select between two 4x4 matrices
- */
-static inline void vmathM4Select( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4x4 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM4Print( const VmathMatrix4 *mat );
-
-/*
- * Print a 4x4 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM4Prints( const VmathMatrix4 *mat, const char *name );
-
-#endif
-
-/*
- * Copy a 3x4 transformation matrix
- */
-static inline void vmathT3Copy( VmathTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Construct a 3x4 transformation matrix containing the specified columns
- */
-static inline void vmathT3MakeFromCols( VmathTransform3 *result, const VmathVector3 *col0, const VmathVector3 *col1, const VmathVector3 *col2, const VmathVector3 *col3 );
-
-/*
- * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
- */
-static inline void vmathT3MakeFromM3V3( VmathTransform3 *result, const VmathMatrix3 *tfrm, const VmathVector3 *translateVec );
-
-/*
- * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
- */
-static inline void vmathT3MakeFromQV3( VmathTransform3 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec );
-
-/*
- * Set all elements of a 3x4 transformation matrix to the same scalar value
- */
-static inline void vmathT3MakeFromScalar( VmathTransform3 *result, float scalar );
-
-/*
- * Set the upper-left 3x3 submatrix
- */
-static inline void vmathT3SetUpper3x3( VmathTransform3 *result, const VmathMatrix3 *mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
- */
-static inline void vmathT3GetUpper3x3( VmathMatrix3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Set translation component
- */
-static inline void vmathT3SetTranslation( VmathTransform3 *result, const VmathVector3 *translateVec );
-
-/*
- * Get the translation component of a 3x4 transformation matrix
- */
-static inline void vmathT3GetTranslation( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Set column 0 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol0( VmathTransform3 *result, const VmathVector3 *col0 );
-
-/*
- * Set column 1 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol1( VmathTransform3 *result, const VmathVector3 *col1 );
-
-/*
- * Set column 2 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol2( VmathTransform3 *result, const VmathVector3 *col2 );
-
-/*
- * Set column 3 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol3( VmathTransform3 *result, const VmathVector3 *col3 );
-
-/*
- * Get column 0 of a 3x4 transformation matrix
- */
-static inline void vmathT3GetCol0( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Get column 1 of a 3x4 transformation matrix
- */
-static inline void vmathT3GetCol1( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Get column 2 of a 3x4 transformation matrix
- */
-static inline void vmathT3GetCol2( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Get column 3 of a 3x4 transformation matrix
- */
-static inline void vmathT3GetCol3( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Set the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3SetCol( VmathTransform3 *result, int col, const VmathVector3 *vec );
-
-/*
- * Set the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3SetRow( VmathTransform3 *result, int row, const VmathVector4 *vec );
-
-/*
- * Get the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3GetCol( VmathVector3 *result, const VmathTransform3 *tfrm, int col );
-
-/*
- * Get the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3GetRow( VmathVector4 *result, const VmathTransform3 *tfrm, int row );
-
-/*
- * Set the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline void vmathT3SetElem( VmathTransform3 *result, int col, int row, float val );
-
-/*
- * Get the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline float vmathT3GetElem( const VmathTransform3 *tfrm, int col, int row );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D vector
- */
-static inline void vmathT3MulV3( VmathVector3 *result, const VmathTransform3 *tfrm, const VmathVector3 *vec );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D point
- */
-static inline void vmathT3MulP3( VmathPoint3 *result, const VmathTransform3 *tfrm, const VmathPoint3 *pnt );
-
-/*
- * Multiply two 3x4 transformation matrices
- */
-static inline void vmathT3Mul( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 );
-
-/*
- * Construct an identity 3x4 transformation matrix
- */
-static inline void vmathT3MakeIdentity( VmathTransform3 *result );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x axis
- */
-static inline void vmathT3MakeRotationX( VmathTransform3 *result, float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the y axis
- */
-static inline void vmathT3MakeRotationY( VmathTransform3 *result, float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the z axis
- */
-static inline void vmathT3MakeRotationZ( VmathTransform3 *result, float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
- */
-static inline void vmathT3MakeRotationZYX( VmathTransform3 *result, const VmathVector3 *radiansXYZ );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathT3MakeRotationAxis( VmathTransform3 *result, float radians, const VmathVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathT3MakeRotationQ( VmathTransform3 *result, const VmathQuat *unitQuat );
-
-/*
- * Construct a 3x4 transformation matrix to perform scaling
- */
-static inline void vmathT3MakeScale( VmathTransform3 *result, const VmathVector3 *scaleVec );
-
-/*
- * Construct a 3x4 transformation matrix to perform translation
- */
-static inline void vmathT3MakeTranslation( VmathTransform3 *result, const VmathVector3 *translateVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathT3AppendScale( VmathTransform3 *result, const VmathTransform3 *tfrm, const VmathVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathT3PrependScale( VmathTransform3 *result, const VmathVector3 *scaleVec, const VmathTransform3 *tfrm );
-
-/*
- * Multiply two 3x4 transformation matrices per element
- */
-static inline void vmathT3MulPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 );
-
-/*
- * Compute the absolute value of a 3x4 transformation matrix per element
- */
-static inline void vmathT3AbsPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Inverse of a 3x4 transformation matrix
- * NOTE: 
- * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
- */
-static inline void vmathT3Inverse( VmathTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
- */
-static inline void vmathT3OrthoInverse( VmathTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Conditionally select between two 3x4 transformation matrices
- */
-static inline void vmathT3Select( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x4 transformation matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathT3Print( const VmathTransform3 *tfrm );
-
-/*
- * Print a 3x4 transformation matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathT3Prints( const VmathTransform3 *tfrm, const char *name );
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#include "vec_aos.h"
-#include "quat_aos.h"
-#include "mat_aos.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_AOS_C_SCALAR_H
+#define _VECTORMATH_AOS_C_SCALAR_H
+
+#include <math.h>
+
+#ifdef _VECTORMATH_DEBUG
+#include <stdio.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef _VECTORMATH_AOS_C_TYPES_H
+#define _VECTORMATH_AOS_C_TYPES_H
+
+/* A 3-D vector in array-of-structures format
+ */
+typedef struct _VmathVector3
+{
+    float x;
+    float y;
+    float z;
+#ifndef __GNUC__
+    float d;
+#endif
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+VmathVector3;
+
+/* A 4-D vector in array-of-structures format
+ */
+typedef struct _VmathVector4
+{
+    float x;
+    float y;
+    float z;
+    float w;
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+VmathVector4;
+
+/* A 3-D point in array-of-structures format
+ */
+typedef struct _VmathPoint3
+{
+    float x;
+    float y;
+    float z;
+#ifndef __GNUC__
+    float d;
+#endif
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+VmathPoint3;
+
+/* A quaternion in array-of-structures format
+ */
+typedef struct _VmathQuat
+{
+    float x;
+    float y;
+    float z;
+    float w;
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+VmathQuat;
+
+/* A 3x3 matrix in array-of-structures format
+ */
+typedef struct _VmathMatrix3
+{
+    VmathVector3 col0;
+    VmathVector3 col1;
+    VmathVector3 col2;
+} VmathMatrix3;
+
+/* A 4x4 matrix in array-of-structures format
+ */
+typedef struct _VmathMatrix4
+{
+    VmathVector4 col0;
+    VmathVector4 col1;
+    VmathVector4 col2;
+    VmathVector4 col3;
+} VmathMatrix4;
+
+/* A 3x4 transformation matrix in array-of-structures format
+ */
+typedef struct _VmathTransform3
+{
+    VmathVector3 col0;
+    VmathVector3 col1;
+    VmathVector3 col2;
+    VmathVector3 col3;
+} VmathTransform3;
+
+#endif
+
+/*
+ * Copy a 3-D vector
+ */
+static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Construct a 3-D vector from x, y, and z elements
+ */
+static inline void vmathV3MakeFromElems( VmathVector3 *result, float x, float y, float z );
+
+/*
+ * Copy elements from a 3-D point into a 3-D vector
+ */
+static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Set all elements of a 3-D vector to the same scalar value
+ */
+static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar );
+
+/*
+ * Set the x element of a 3-D vector
+ */
+static inline void vmathV3SetX( VmathVector3 *result, float x );
+
+/*
+ * Set the y element of a 3-D vector
+ */
+static inline void vmathV3SetY( VmathVector3 *result, float y );
+
+/*
+ * Set the z element of a 3-D vector
+ */
+static inline void vmathV3SetZ( VmathVector3 *result, float z );
+
+/*
+ * Get the x element of a 3-D vector
+ */
+static inline float vmathV3GetX( const VmathVector3 *vec );
+
+/*
+ * Get the y element of a 3-D vector
+ */
+static inline float vmathV3GetY( const VmathVector3 *vec );
+
+/*
+ * Get the z element of a 3-D vector
+ */
+static inline float vmathV3GetZ( const VmathVector3 *vec );
+
+/*
+ * Set an x, y, or z element of a 3-D vector by index
+ */
+static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value );
+
+/*
+ * Get an x, y, or z element of a 3-D vector by index
+ */
+static inline float vmathV3GetElem( const VmathVector3 *vec, int idx );
+
+/*
+ * Add two 3-D vectors
+ */
+static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Subtract a 3-D vector from another 3-D vector
+ */
+static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Add a 3-D vector to a 3-D point
+ */
+static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt );
+
+/*
+ * Multiply a 3-D vector by a scalar
+ */
+static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar );
+
+/*
+ * Divide a 3-D vector by a scalar
+ */
+static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar );
+
+/*
+ * Negate all elements of a 3-D vector
+ */
+static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Construct x axis
+ */
+static inline void vmathV3MakeXAxis( VmathVector3 *result );
+
+/*
+ * Construct y axis
+ */
+static inline void vmathV3MakeYAxis( VmathVector3 *result );
+
+/*
+ * Construct z axis
+ */
+static inline void vmathV3MakeZAxis( VmathVector3 *result );
+
+/*
+ * Multiply two 3-D vectors per element
+ */
+static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Divide two 3-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Compute the reciprocal of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Compute the square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Compute the reciprocal square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Compute the absolute value of a 3-D vector per element
+ */
+static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Copy sign from one 3-D vector to another, per element
+ */
+static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Maximum of two 3-D vectors per element
+ */
+static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Minimum of two 3-D vectors per element
+ */
+static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Maximum element of a 3-D vector
+ */
+static inline float vmathV3MaxElem( const VmathVector3 *vec );
+
+/*
+ * Minimum element of a 3-D vector
+ */
+static inline float vmathV3MinElem( const VmathVector3 *vec );
+
+/*
+ * Compute the sum of all elements of a 3-D vector
+ */
+static inline float vmathV3Sum( const VmathVector3 *vec );
+
+/*
+ * Compute the dot product of two 3-D vectors
+ */
+static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Compute the square of the length of a 3-D vector
+ */
+static inline float vmathV3LengthSqr( const VmathVector3 *vec );
+
+/*
+ * Compute the length of a 3-D vector
+ */
+static inline float vmathV3Length( const VmathVector3 *vec );
+
+/*
+ * Normalize a 3-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Compute cross product of two 3-D vectors
+ */
+static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Outer product of two 3-D vectors
+ */
+static inline void vmathV3Outer( VmathMatrix3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Pre-multiply a row vector by a 3x3 matrix
+ */
+static inline void vmathV3RowMul( VmathVector3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat );
+
+/*
+ * Cross-product matrix of a 3-D vector
+ */
+static inline void vmathV3CrossMatrix( VmathMatrix3 *result, const VmathVector3 *vec );
+
+/*
+ * Create cross-product matrix and multiply
+ * NOTE: 
+ * Faster than separately creating a cross-product matrix and multiplying.
+ */
+static inline void vmathV3CrossMatrixMul( VmathMatrix3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat );
+
+/*
+ * Linear interpolation between two 3-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Spherical linear interpolation between two 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 );
+
+/*
+ * Conditionally select between two 3-D vectors
+ */
+static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV3Print( const VmathVector3 *vec );
+
+/*
+ * Print a 3-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV3Prints( const VmathVector3 *vec, const char *name );
+
+#endif
+
+/*
+ * Copy a 4-D vector
+ */
+static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Construct a 4-D vector from x, y, z, and w elements
+ */
+static inline void vmathV4MakeFromElems( VmathVector4 *result, float x, float y, float z, float w );
+
+/*
+ * Construct a 4-D vector from a 3-D vector and a scalar
+ */
+static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float w );
+
+/*
+ * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+ */
+static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec );
+
+/*
+ * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+ */
+static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt );
+
+/*
+ * Copy elements from a quaternion into a 4-D vector
+ */
+static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat );
+
+/*
+ * Set all elements of a 4-D vector to the same scalar value
+ */
+static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar );
+
+/*
+ * Set the x, y, and z elements of a 4-D vector
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec );
+
+/*
+ * Get the x, y, and z elements of a 4-D vector
+ */
+static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec );
+
+/*
+ * Set the x element of a 4-D vector
+ */
+static inline void vmathV4SetX( VmathVector4 *result, float x );
+
+/*
+ * Set the y element of a 4-D vector
+ */
+static inline void vmathV4SetY( VmathVector4 *result, float y );
+
+/*
+ * Set the z element of a 4-D vector
+ */
+static inline void vmathV4SetZ( VmathVector4 *result, float z );
+
+/*
+ * Set the w element of a 4-D vector
+ */
+static inline void vmathV4SetW( VmathVector4 *result, float w );
+
+/*
+ * Get the x element of a 4-D vector
+ */
+static inline float vmathV4GetX( const VmathVector4 *vec );
+
+/*
+ * Get the y element of a 4-D vector
+ */
+static inline float vmathV4GetY( const VmathVector4 *vec );
+
+/*
+ * Get the z element of a 4-D vector
+ */
+static inline float vmathV4GetZ( const VmathVector4 *vec );
+
+/*
+ * Get the w element of a 4-D vector
+ */
+static inline float vmathV4GetW( const VmathVector4 *vec );
+
+/*
+ * Set an x, y, z, or w element of a 4-D vector by index
+ */
+static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value );
+
+/*
+ * Get an x, y, z, or w element of a 4-D vector by index
+ */
+static inline float vmathV4GetElem( const VmathVector4 *vec, int idx );
+
+/*
+ * Add two 4-D vectors
+ */
+static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Subtract a 4-D vector from another 4-D vector
+ */
+static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Multiply a 4-D vector by a scalar
+ */
+static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar );
+
+/*
+ * Divide a 4-D vector by a scalar
+ */
+static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar );
+
+/*
+ * Negate all elements of a 4-D vector
+ */
+static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Construct x axis
+ */
+static inline void vmathV4MakeXAxis( VmathVector4 *result );
+
+/*
+ * Construct y axis
+ */
+static inline void vmathV4MakeYAxis( VmathVector4 *result );
+
+/*
+ * Construct z axis
+ */
+static inline void vmathV4MakeZAxis( VmathVector4 *result );
+
+/*
+ * Construct w axis
+ */
+static inline void vmathV4MakeWAxis( VmathVector4 *result );
+
+/*
+ * Multiply two 4-D vectors per element
+ */
+static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Divide two 4-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Compute the reciprocal of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Compute the square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Compute the reciprocal square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Compute the absolute value of a 4-D vector per element
+ */
+static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Copy sign from one 4-D vector to another, per element
+ */
+static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Maximum of two 4-D vectors per element
+ */
+static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Minimum of two 4-D vectors per element
+ */
+static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Maximum element of a 4-D vector
+ */
+static inline float vmathV4MaxElem( const VmathVector4 *vec );
+
+/*
+ * Minimum element of a 4-D vector
+ */
+static inline float vmathV4MinElem( const VmathVector4 *vec );
+
+/*
+ * Compute the sum of all elements of a 4-D vector
+ */
+static inline float vmathV4Sum( const VmathVector4 *vec );
+
+/*
+ * Compute the dot product of two 4-D vectors
+ */
+static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Compute the square of the length of a 4-D vector
+ */
+static inline float vmathV4LengthSqr( const VmathVector4 *vec );
+
+/*
+ * Compute the length of a 4-D vector
+ */
+static inline float vmathV4Length( const VmathVector4 *vec );
+
+/*
+ * Normalize a 4-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Outer product of two 4-D vectors
+ */
+static inline void vmathV4Outer( VmathMatrix4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Linear interpolation between two 4-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Spherical linear interpolation between two 4-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 );
+
+/*
+ * Conditionally select between two 4-D vectors
+ */
+static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV4Print( const VmathVector4 *vec );
+
+/*
+ * Print a 4-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV4Prints( const VmathVector4 *vec, const char *name );
+
+#endif
+
+/*
+ * Copy a 3-D point
+ */
+static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Construct a 3-D point from x, y, and z elements
+ */
+static inline void vmathP3MakeFromElems( VmathPoint3 *result, float x, float y, float z );
+
+/*
+ * Copy elements from a 3-D vector into a 3-D point
+ */
+static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec );
+
+/*
+ * Set all elements of a 3-D point to the same scalar value
+ */
+static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar );
+
+/*
+ * Set the x element of a 3-D point
+ */
+static inline void vmathP3SetX( VmathPoint3 *result, float x );
+
+/*
+ * Set the y element of a 3-D point
+ */
+static inline void vmathP3SetY( VmathPoint3 *result, float y );
+
+/*
+ * Set the z element of a 3-D point
+ */
+static inline void vmathP3SetZ( VmathPoint3 *result, float z );
+
+/*
+ * Get the x element of a 3-D point
+ */
+static inline float vmathP3GetX( const VmathPoint3 *pnt );
+
+/*
+ * Get the y element of a 3-D point
+ */
+static inline float vmathP3GetY( const VmathPoint3 *pnt );
+
+/*
+ * Get the z element of a 3-D point
+ */
+static inline float vmathP3GetZ( const VmathPoint3 *pnt );
+
+/*
+ * Set an x, y, or z element of a 3-D point by index
+ */
+static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value );
+
+/*
+ * Get an x, y, or z element of a 3-D point by index
+ */
+static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx );
+
+/*
+ * Subtract a 3-D point from another 3-D point
+ */
+static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Add a 3-D point to a 3-D vector
+ */
+static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec );
+
+/*
+ * Subtract a 3-D vector from a 3-D point
+ */
+static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec );
+
+/*
+ * Multiply two 3-D points per element
+ */
+static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Divide two 3-D points per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Compute the reciprocal of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Compute the square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Compute the reciprocal square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Compute the absolute value of a 3-D point per element
+ */
+static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Copy sign from one 3-D point to another, per element
+ */
+static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Maximum of two 3-D points per element
+ */
+static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Minimum of two 3-D points per element
+ */
+static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Maximum element of a 3-D point
+ */
+static inline float vmathP3MaxElem( const VmathPoint3 *pnt );
+
+/*
+ * Minimum element of a 3-D point
+ */
+static inline float vmathP3MinElem( const VmathPoint3 *pnt );
+
+/*
+ * Compute the sum of all elements of a 3-D point
+ */
+static inline float vmathP3Sum( const VmathPoint3 *pnt );
+
+/*
+ * Apply uniform scale to a 3-D point
+ */
+static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal );
+
+/*
+ * Apply non-uniform scale to a 3-D point
+ */
+static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec );
+
+/*
+ * Scalar projection of a 3-D point on a unit-length 3-D vector
+ */
+static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec );
+
+/*
+ * Compute the square of the distance of a 3-D point from the coordinate-system origin
+ */
+static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt );
+
+/*
+ * Compute the distance of a 3-D point from the coordinate-system origin
+ */
+static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt );
+
+/*
+ * Compute the square of the distance between two 3-D points
+ */
+static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Compute the distance between two 3-D points
+ */
+static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Linear interpolation between two 3-D points
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Conditionally select between two 3-D points
+ */
+static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D point
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathP3Print( const VmathPoint3 *pnt );
+
+/*
+ * Print a 3-D point and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name );
+
+#endif
+
+/*
+ * Copy a quaternion
+ */
+static inline void vmathQCopy( VmathQuat *result, const VmathQuat *quat );
+
+/*
+ * Construct a quaternion from x, y, z, and w elements
+ */
+static inline void vmathQMakeFromElems( VmathQuat *result, float x, float y, float z, float w );
+
+/*
+ * Construct a quaternion from a 3-D vector and a scalar
+ */
+static inline void vmathQMakeFromV3Scalar( VmathQuat *result, const VmathVector3 *xyz, float w );
+
+/*
+ * Copy elements from a 4-D vector into a quaternion
+ */
+static inline void vmathQMakeFromV4( VmathQuat *result, const VmathVector4 *vec );
+
+/*
+ * Convert a rotation matrix to a unit-length quaternion
+ */
+static inline void vmathQMakeFromM3( VmathQuat *result, const VmathMatrix3 *rotMat );
+
+/*
+ * Set all elements of a quaternion to the same scalar value
+ */
+static inline void vmathQMakeFromScalar( VmathQuat *result, float scalar );
+
+/*
+ * Set the x, y, and z elements of a quaternion
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathQSetXYZ( VmathQuat *result, const VmathVector3 *vec );
+
+/*
+ * Get the x, y, and z elements of a quaternion
+ */
+static inline void vmathQGetXYZ( VmathVector3 *result, const VmathQuat *quat );
+
+/*
+ * Set the x element of a quaternion
+ */
+static inline void vmathQSetX( VmathQuat *result, float x );
+
+/*
+ * Set the y element of a quaternion
+ */
+static inline void vmathQSetY( VmathQuat *result, float y );
+
+/*
+ * Set the z element of a quaternion
+ */
+static inline void vmathQSetZ( VmathQuat *result, float z );
+
+/*
+ * Set the w element of a quaternion
+ */
+static inline void vmathQSetW( VmathQuat *result, float w );
+
+/*
+ * Get the x element of a quaternion
+ */
+static inline float vmathQGetX( const VmathQuat *quat );
+
+/*
+ * Get the y element of a quaternion
+ */
+static inline float vmathQGetY( const VmathQuat *quat );
+
+/*
+ * Get the z element of a quaternion
+ */
+static inline float vmathQGetZ( const VmathQuat *quat );
+
+/*
+ * Get the w element of a quaternion
+ */
+static inline float vmathQGetW( const VmathQuat *quat );
+
+/*
+ * Set an x, y, z, or w element of a quaternion by index
+ */
+static inline void vmathQSetElem( VmathQuat *result, int idx, float value );
+
+/*
+ * Get an x, y, z, or w element of a quaternion by index
+ */
+static inline float vmathQGetElem( const VmathQuat *quat, int idx );
+
+/*
+ * Add two quaternions
+ */
+static inline void vmathQAdd( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Subtract a quaternion from another quaternion
+ */
+static inline void vmathQSub( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Multiply two quaternions
+ */
+static inline void vmathQMul( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Multiply a quaternion by a scalar
+ */
+static inline void vmathQScalarMul( VmathQuat *result, const VmathQuat *quat, float scalar );
+
+/*
+ * Divide a quaternion by a scalar
+ */
+static inline void vmathQScalarDiv( VmathQuat *result, const VmathQuat *quat, float scalar );
+
+/*
+ * Negate all elements of a quaternion
+ */
+static inline void vmathQNeg( VmathQuat *result, const VmathQuat *quat );
+
+/*
+ * Construct an identity quaternion
+ */
+static inline void vmathQMakeIdentity( VmathQuat *result );
+
+/*
+ * Construct a quaternion to rotate between two unit-length 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+ */
+static inline void vmathQMakeRotationArc( VmathQuat *result, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 );
+
+/*
+ * Construct a quaternion to rotate around a unit-length 3-D vector
+ */
+static inline void vmathQMakeRotationAxis( VmathQuat *result, float radians, const VmathVector3 *unitVec );
+
+/*
+ * Construct a quaternion to rotate around the x axis
+ */
+static inline void vmathQMakeRotationX( VmathQuat *result, float radians );
+
+/*
+ * Construct a quaternion to rotate around the y axis
+ */
+static inline void vmathQMakeRotationY( VmathQuat *result, float radians );
+
+/*
+ * Construct a quaternion to rotate around the z axis
+ */
+static inline void vmathQMakeRotationZ( VmathQuat *result, float radians );
+
+/*
+ * Compute the conjugate of a quaternion
+ */
+static inline void vmathQConj( VmathQuat *result, const VmathQuat *quat );
+
+/*
+ * Use a unit-length quaternion to rotate a 3-D vector
+ */
+static inline void vmathQRotate( VmathVector3 *result, const VmathQuat *unitQuat, const VmathVector3 *vec );
+
+/*
+ * Compute the dot product of two quaternions
+ */
+static inline float vmathQDot( const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Compute the norm of a quaternion
+ */
+static inline float vmathQNorm( const VmathQuat *quat );
+
+/*
+ * Compute the length of a quaternion
+ */
+static inline float vmathQLength( const VmathQuat *quat );
+
+/*
+ * Normalize a quaternion
+ * NOTE: 
+ * The result is unpredictable when all elements of quat are at or near zero.
+ */
+static inline void vmathQNormalize( VmathQuat *result, const VmathQuat *quat );
+
+/*
+ * Linear interpolation between two quaternions
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathQLerp( VmathQuat *result, float t, const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Spherical linear interpolation between two quaternions
+ * NOTE: 
+ * Interpolates along the shortest path between orientations.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathQSlerp( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1 );
+
+/*
+ * Spherical quadrangle interpolation
+ */
+static inline void vmathQSquad( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1, const VmathQuat *unitQuat2, const VmathQuat *unitQuat3 );
+
+/*
+ * Conditionally select between two quaternions
+ */
+static inline void vmathQSelect( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a quaternion
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathQPrint( const VmathQuat *quat );
+
+/*
+ * Print a quaternion and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathQPrints( const VmathQuat *quat, const char *name );
+
+#endif
+
+/*
+ * Copy a 3x3 matrix
+ */
+static inline void vmathM3Copy( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Construct a 3x3 matrix containing the specified columns
+ */
+static inline void vmathM3MakeFromCols( VmathMatrix3 *result, const VmathVector3 *col0, const VmathVector3 *col1, const VmathVector3 *col2 );
+
+/*
+ * Construct a 3x3 rotation matrix from a unit-length quaternion
+ */
+static inline void vmathM3MakeFromQ( VmathMatrix3 *result, const VmathQuat *unitQuat );
+
+/*
+ * Set all elements of a 3x3 matrix to the same scalar value
+ */
+static inline void vmathM3MakeFromScalar( VmathMatrix3 *result, float scalar );
+
+/*
+ * Set column 0 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol0( VmathMatrix3 *result, const VmathVector3 *col0 );
+
+/*
+ * Set column 1 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol1( VmathMatrix3 *result, const VmathVector3 *col1 );
+
+/*
+ * Set column 2 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol2( VmathMatrix3 *result, const VmathVector3 *col2 );
+
+/*
+ * Get column 0 of a 3x3 matrix
+ */
+static inline void vmathM3GetCol0( VmathVector3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Get column 1 of a 3x3 matrix
+ */
+static inline void vmathM3GetCol1( VmathVector3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Get column 2 of a 3x3 matrix
+ */
+static inline void vmathM3GetCol2( VmathVector3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Set the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3SetCol( VmathMatrix3 *result, int col, const VmathVector3 *vec );
+
+/*
+ * Set the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3SetRow( VmathMatrix3 *result, int row, const VmathVector3 *vec );
+
+/*
+ * Get the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3GetCol( VmathVector3 *result, const VmathMatrix3 *mat, int col );
+
+/*
+ * Get the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3GetRow( VmathVector3 *result, const VmathMatrix3 *mat, int row );
+
+/*
+ * Set the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline void vmathM3SetElem( VmathMatrix3 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline float vmathM3GetElem( const VmathMatrix3 *mat, int col, int row );
+
+/*
+ * Add two 3x3 matrices
+ */
+static inline void vmathM3Add( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
+
+/*
+ * Subtract a 3x3 matrix from another 3x3 matrix
+ */
+static inline void vmathM3Sub( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
+
+/*
+ * Negate all elements of a 3x3 matrix
+ */
+static inline void vmathM3Neg( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Multiply a 3x3 matrix by a scalar
+ */
+static inline void vmathM3ScalarMul( VmathMatrix3 *result, const VmathMatrix3 *mat, float scalar );
+
+/*
+ * Multiply a 3x3 matrix by a 3-D vector
+ */
+static inline void vmathM3MulV3( VmathVector3 *result, const VmathMatrix3 *mat, const VmathVector3 *vec );
+
+/*
+ * Multiply two 3x3 matrices
+ */
+static inline void vmathM3Mul( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
+
+/*
+ * Construct an identity 3x3 matrix
+ */
+static inline void vmathM3MakeIdentity( VmathMatrix3 *result );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x axis
+ */
+static inline void vmathM3MakeRotationX( VmathMatrix3 *result, float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the y axis
+ */
+static inline void vmathM3MakeRotationY( VmathMatrix3 *result, float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the z axis
+ */
+static inline void vmathM3MakeRotationZ( VmathMatrix3 *result, float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathM3MakeRotationZYX( VmathMatrix3 *result, const VmathVector3 *radiansXYZ );
+
+/*
+ * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathM3MakeRotationAxis( VmathMatrix3 *result, float radians, const VmathVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathM3MakeRotationQ( VmathMatrix3 *result, const VmathQuat *unitQuat );
+
+/*
+ * Construct a 3x3 matrix to perform scaling
+ */
+static inline void vmathM3MakeScale( VmathMatrix3 *result, const VmathVector3 *scaleVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathM3AppendScale( VmathMatrix3 *result, const VmathMatrix3 *mat, const VmathVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathM3PrependScale( VmathMatrix3 *result, const VmathVector3 *scaleVec, const VmathMatrix3 *mat );
+
+/*
+ * Multiply two 3x3 matrices per element
+ */
+static inline void vmathM3MulPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
+
+/*
+ * Compute the absolute value of a 3x3 matrix per element
+ */
+static inline void vmathM3AbsPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Transpose of a 3x3 matrix
+ */
+static inline void vmathM3Transpose( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Compute the inverse of a 3x3 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathM3Inverse( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Determinant of a 3x3 matrix
+ */
+static inline float vmathM3Determinant( const VmathMatrix3 *mat );
+
+/*
+ * Conditionally select between two 3x3 matrices
+ */
+static inline void vmathM3Select( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x3 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM3Print( const VmathMatrix3 *mat );
+
+/*
+ * Print a 3x3 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM3Prints( const VmathMatrix3 *mat, const char *name );
+
+#endif
+
+/*
+ * Copy a 4x4 matrix
+ */
+static inline void vmathM4Copy( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Construct a 4x4 matrix containing the specified columns
+ */
+static inline void vmathM4MakeFromCols( VmathMatrix4 *result, const VmathVector4 *col0, const VmathVector4 *col1, const VmathVector4 *col2, const VmathVector4 *col3 );
+
+/*
+ * Construct a 4x4 matrix from a 3x4 transformation matrix
+ */
+static inline void vmathM4MakeFromT3( VmathMatrix4 *result, const VmathTransform3 *mat );
+
+/*
+ * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline void vmathM4MakeFromM3V3( VmathMatrix4 *result, const VmathMatrix3 *mat, const VmathVector3 *translateVec );
+
+/*
+ * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline void vmathM4MakeFromQV3( VmathMatrix4 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec );
+
+/*
+ * Set all elements of a 4x4 matrix to the same scalar value
+ */
+static inline void vmathM4MakeFromScalar( VmathMatrix4 *result, float scalar );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathM4SetUpper3x3( VmathMatrix4 *result, const VmathMatrix3 *mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 4x4 matrix
+ */
+static inline void vmathM4GetUpper3x3( VmathMatrix3 *result, const VmathMatrix4 *mat );
+
+/*
+ * Set translation component
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathM4SetTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec );
+
+/*
+ * Get the translation component of a 4x4 matrix
+ */
+static inline void vmathM4GetTranslation( VmathVector3 *result, const VmathMatrix4 *mat );
+
+/*
+ * Set column 0 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol0( VmathMatrix4 *result, const VmathVector4 *col0 );
+
+/*
+ * Set column 1 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol1( VmathMatrix4 *result, const VmathVector4 *col1 );
+
+/*
+ * Set column 2 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol2( VmathMatrix4 *result, const VmathVector4 *col2 );
+
+/*
+ * Set column 3 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol3( VmathMatrix4 *result, const VmathVector4 *col3 );
+
+/*
+ * Get column 0 of a 4x4 matrix
+ */
+static inline void vmathM4GetCol0( VmathVector4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Get column 1 of a 4x4 matrix
+ */
+static inline void vmathM4GetCol1( VmathVector4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Get column 2 of a 4x4 matrix
+ */
+static inline void vmathM4GetCol2( VmathVector4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Get column 3 of a 4x4 matrix
+ */
+static inline void vmathM4GetCol3( VmathVector4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Set the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4SetCol( VmathMatrix4 *result, int col, const VmathVector4 *vec );
+
+/*
+ * Set the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4SetRow( VmathMatrix4 *result, int row, const VmathVector4 *vec );
+
+/*
+ * Get the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4GetCol( VmathVector4 *result, const VmathMatrix4 *mat, int col );
+
+/*
+ * Get the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4GetRow( VmathVector4 *result, const VmathMatrix4 *mat, int row );
+
+/*
+ * Set the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline void vmathM4SetElem( VmathMatrix4 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline float vmathM4GetElem( const VmathMatrix4 *mat, int col, int row );
+
+/*
+ * Add two 4x4 matrices
+ */
+static inline void vmathM4Add( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
+
+/*
+ * Subtract a 4x4 matrix from another 4x4 matrix
+ */
+static inline void vmathM4Sub( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
+
+/*
+ * Negate all elements of a 4x4 matrix
+ */
+static inline void vmathM4Neg( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Multiply a 4x4 matrix by a scalar
+ */
+static inline void vmathM4ScalarMul( VmathMatrix4 *result, const VmathMatrix4 *mat, float scalar );
+
+/*
+ * Multiply a 4x4 matrix by a 4-D vector
+ */
+static inline void vmathM4MulV4( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector4 *vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D vector
+ */
+static inline void vmathM4MulV3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector3 *vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D point
+ */
+static inline void vmathM4MulP3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathPoint3 *pnt );
+
+/*
+ * Multiply two 4x4 matrices
+ */
+static inline void vmathM4Mul( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
+
+/*
+ * Multiply a 4x4 matrix by a 3x4 transformation matrix
+ */
+static inline void vmathM4MulT3( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathTransform3 *tfrm );
+
+/*
+ * Construct an identity 4x4 matrix
+ */
+static inline void vmathM4MakeIdentity( VmathMatrix4 *result );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x axis
+ */
+static inline void vmathM4MakeRotationX( VmathMatrix4 *result, float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the y axis
+ */
+static inline void vmathM4MakeRotationY( VmathMatrix4 *result, float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the z axis
+ */
+static inline void vmathM4MakeRotationZ( VmathMatrix4 *result, float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathM4MakeRotationZYX( VmathMatrix4 *result, const VmathVector3 *radiansXYZ );
+
+/*
+ * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathM4MakeRotationAxis( VmathMatrix4 *result, float radians, const VmathVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathM4MakeRotationQ( VmathMatrix4 *result, const VmathQuat *unitQuat );
+
+/*
+ * Construct a 4x4 matrix to perform scaling
+ */
+static inline void vmathM4MakeScale( VmathMatrix4 *result, const VmathVector3 *scaleVec );
+
+/*
+ * Construct a 4x4 matrix to perform translation
+ */
+static inline void vmathM4MakeTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec );
+
+/*
+ * Construct viewing matrix based on eye position, position looked at, and up direction
+ */
+static inline void vmathM4MakeLookAt( VmathMatrix4 *result, const VmathPoint3 *eyePos, const VmathPoint3 *lookAtPos, const VmathVector3 *upVec );
+
+/*
+ * Construct a perspective projection matrix
+ */
+static inline void vmathM4MakePerspective( VmathMatrix4 *result, float fovyRadians, float aspect, float zNear, float zFar );
+
+/*
+ * Construct a perspective projection matrix based on frustum
+ */
+static inline void vmathM4MakeFrustum( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar );
+
+/*
+ * Construct an orthographic projection matrix
+ */
+static inline void vmathM4MakeOrthographic( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar );
+
+/*
+ * Append (post-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathM4AppendScale( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathM4PrependScale( VmathMatrix4 *result, const VmathVector3 *scaleVec, const VmathMatrix4 *mat );
+
+/*
+ * Multiply two 4x4 matrices per element
+ */
+static inline void vmathM4MulPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
+
+/*
+ * Compute the absolute value of a 4x4 matrix per element
+ */
+static inline void vmathM4AbsPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Transpose of a 4x4 matrix
+ */
+static inline void vmathM4Transpose( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathM4Inverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathM4AffineInverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+ */
+static inline void vmathM4OrthoInverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Determinant of a 4x4 matrix
+ */
+static inline float vmathM4Determinant( const VmathMatrix4 *mat );
+
+/*
+ * Conditionally select between two 4x4 matrices
+ */
+static inline void vmathM4Select( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4x4 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM4Print( const VmathMatrix4 *mat );
+
+/*
+ * Print a 4x4 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM4Prints( const VmathMatrix4 *mat, const char *name );
+
+#endif
+
+/*
+ * Copy a 3x4 transformation matrix
+ */
+static inline void vmathT3Copy( VmathTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Construct a 3x4 transformation matrix containing the specified columns
+ */
+static inline void vmathT3MakeFromCols( VmathTransform3 *result, const VmathVector3 *col0, const VmathVector3 *col1, const VmathVector3 *col2, const VmathVector3 *col3 );
+
+/*
+ * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline void vmathT3MakeFromM3V3( VmathTransform3 *result, const VmathMatrix3 *tfrm, const VmathVector3 *translateVec );
+
+/*
+ * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline void vmathT3MakeFromQV3( VmathTransform3 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec );
+
+/*
+ * Set all elements of a 3x4 transformation matrix to the same scalar value
+ */
+static inline void vmathT3MakeFromScalar( VmathTransform3 *result, float scalar );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ */
+static inline void vmathT3SetUpper3x3( VmathTransform3 *result, const VmathMatrix3 *mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetUpper3x3( VmathMatrix3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Set translation component
+ */
+static inline void vmathT3SetTranslation( VmathTransform3 *result, const VmathVector3 *translateVec );
+
+/*
+ * Get the translation component of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetTranslation( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Set column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol0( VmathTransform3 *result, const VmathVector3 *col0 );
+
+/*
+ * Set column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol1( VmathTransform3 *result, const VmathVector3 *col1 );
+
+/*
+ * Set column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol2( VmathTransform3 *result, const VmathVector3 *col2 );
+
+/*
+ * Set column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol3( VmathTransform3 *result, const VmathVector3 *col3 );
+
+/*
+ * Get column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetCol0( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Get column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetCol1( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Get column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetCol2( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Get column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetCol3( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Set the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3SetCol( VmathTransform3 *result, int col, const VmathVector3 *vec );
+
+/*
+ * Set the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3SetRow( VmathTransform3 *result, int row, const VmathVector4 *vec );
+
+/*
+ * Get the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3GetCol( VmathVector3 *result, const VmathTransform3 *tfrm, int col );
+
+/*
+ * Get the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3GetRow( VmathVector4 *result, const VmathTransform3 *tfrm, int row );
+
+/*
+ * Set the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline void vmathT3SetElem( VmathTransform3 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline float vmathT3GetElem( const VmathTransform3 *tfrm, int col, int row );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D vector
+ */
+static inline void vmathT3MulV3( VmathVector3 *result, const VmathTransform3 *tfrm, const VmathVector3 *vec );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D point
+ */
+static inline void vmathT3MulP3( VmathPoint3 *result, const VmathTransform3 *tfrm, const VmathPoint3 *pnt );
+
+/*
+ * Multiply two 3x4 transformation matrices
+ */
+static inline void vmathT3Mul( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 );
+
+/*
+ * Construct an identity 3x4 transformation matrix
+ */
+static inline void vmathT3MakeIdentity( VmathTransform3 *result );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x axis
+ */
+static inline void vmathT3MakeRotationX( VmathTransform3 *result, float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the y axis
+ */
+static inline void vmathT3MakeRotationY( VmathTransform3 *result, float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the z axis
+ */
+static inline void vmathT3MakeRotationZ( VmathTransform3 *result, float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathT3MakeRotationZYX( VmathTransform3 *result, const VmathVector3 *radiansXYZ );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathT3MakeRotationAxis( VmathTransform3 *result, float radians, const VmathVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathT3MakeRotationQ( VmathTransform3 *result, const VmathQuat *unitQuat );
+
+/*
+ * Construct a 3x4 transformation matrix to perform scaling
+ */
+static inline void vmathT3MakeScale( VmathTransform3 *result, const VmathVector3 *scaleVec );
+
+/*
+ * Construct a 3x4 transformation matrix to perform translation
+ */
+static inline void vmathT3MakeTranslation( VmathTransform3 *result, const VmathVector3 *translateVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathT3AppendScale( VmathTransform3 *result, const VmathTransform3 *tfrm, const VmathVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathT3PrependScale( VmathTransform3 *result, const VmathVector3 *scaleVec, const VmathTransform3 *tfrm );
+
+/*
+ * Multiply two 3x4 transformation matrices per element
+ */
+static inline void vmathT3MulPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 );
+
+/*
+ * Compute the absolute value of a 3x4 transformation matrix per element
+ */
+static inline void vmathT3AbsPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Inverse of a 3x4 transformation matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+ */
+static inline void vmathT3Inverse( VmathTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+ */
+static inline void vmathT3OrthoInverse( VmathTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Conditionally select between two 3x4 transformation matrices
+ */
+static inline void vmathT3Select( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x4 transformation matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathT3Print( const VmathTransform3 *tfrm );
+
+/*
+ * Print a 3x4 transformation matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathT3Prints( const VmathTransform3 *tfrm, const char *name );
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#include "vec_aos.h"
+#include "quat_aos.h"
+#include "mat_aos.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/scalar/c/vectormath_aos_v.h b/Extras/vectormathlibrary/include/vectormath/scalar/c/vectormath_aos_v.h
index e827260fb..f766bb148 100644
--- a/Extras/vectormathlibrary/include/vectormath/scalar/c/vectormath_aos_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/scalar/c/vectormath_aos_v.h
@@ -1,1845 +1,1845 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_AOS_C_V_SCALAR_H
-#define _VECTORMATH_AOS_C_V_SCALAR_H
-
-#include <math.h>
-
-#ifdef _VECTORMATH_DEBUG
-#include <stdio.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#ifndef _VECTORMATH_AOS_C_TYPES_H
-#define _VECTORMATH_AOS_C_TYPES_H
-
-/* A 3-D vector in array-of-structures format
- */
-typedef struct _VmathVector3
-{
-    float x;
-    float y;
-    float z;
-#ifndef __GNUC__
-    float d;
-#endif
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-VmathVector3;
-
-/* A 4-D vector in array-of-structures format
- */
-typedef struct _VmathVector4
-{
-    float x;
-    float y;
-    float z;
-    float w;
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-VmathVector4;
-
-/* A 3-D point in array-of-structures format
- */
-typedef struct _VmathPoint3
-{
-    float x;
-    float y;
-    float z;
-#ifndef __GNUC__
-    float d;
-#endif
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-VmathPoint3;
-
-/* A quaternion in array-of-structures format
- */
-typedef struct _VmathQuat
-{
-    float x;
-    float y;
-    float z;
-    float w;
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-VmathQuat;
-
-/* A 3x3 matrix in array-of-structures format
- */
-typedef struct _VmathMatrix3
-{
-    VmathVector3 col0;
-    VmathVector3 col1;
-    VmathVector3 col2;
-} VmathMatrix3;
-
-/* A 4x4 matrix in array-of-structures format
- */
-typedef struct _VmathMatrix4
-{
-    VmathVector4 col0;
-    VmathVector4 col1;
-    VmathVector4 col2;
-    VmathVector4 col3;
-} VmathMatrix4;
-
-/* A 3x4 transformation matrix in array-of-structures format
- */
-typedef struct _VmathTransform3
-{
-    VmathVector3 col0;
-    VmathVector3 col1;
-    VmathVector3 col2;
-    VmathVector3 col3;
-} VmathTransform3;
-
-#endif
-
-/*
- * Construct a 3-D vector from x, y, and z elements
- */
-static inline VmathVector3 vmathV3MakeFromElems_V( float x, float y, float z );
-
-/*
- * Copy elements from a 3-D point into a 3-D vector
- */
-static inline VmathVector3 vmathV3MakeFromP3_V( VmathPoint3 pnt );
-
-/*
- * Set all elements of a 3-D vector to the same scalar value
- */
-static inline VmathVector3 vmathV3MakeFromScalar_V( float scalar );
-
-/*
- * Set the x element of a 3-D vector
- */
-static inline void vmathV3SetX_V( VmathVector3 *result, float x );
-
-/*
- * Set the y element of a 3-D vector
- */
-static inline void vmathV3SetY_V( VmathVector3 *result, float y );
-
-/*
- * Set the z element of a 3-D vector
- */
-static inline void vmathV3SetZ_V( VmathVector3 *result, float z );
-
-/*
- * Get the x element of a 3-D vector
- */
-static inline float vmathV3GetX_V( VmathVector3 vec );
-
-/*
- * Get the y element of a 3-D vector
- */
-static inline float vmathV3GetY_V( VmathVector3 vec );
-
-/*
- * Get the z element of a 3-D vector
- */
-static inline float vmathV3GetZ_V( VmathVector3 vec );
-
-/*
- * Set an x, y, or z element of a 3-D vector by index
- */
-static inline void vmathV3SetElem_V( VmathVector3 *result, int idx, float value );
-
-/*
- * Get an x, y, or z element of a 3-D vector by index
- */
-static inline float vmathV3GetElem_V( VmathVector3 vec, int idx );
-
-/*
- * Add two 3-D vectors
- */
-static inline VmathVector3 vmathV3Add_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Subtract a 3-D vector from another 3-D vector
- */
-static inline VmathVector3 vmathV3Sub_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Add a 3-D vector to a 3-D point
- */
-static inline VmathPoint3 vmathV3AddP3_V( VmathVector3 vec, VmathPoint3 pnt );
-
-/*
- * Multiply a 3-D vector by a scalar
- */
-static inline VmathVector3 vmathV3ScalarMul_V( VmathVector3 vec, float scalar );
-
-/*
- * Divide a 3-D vector by a scalar
- */
-static inline VmathVector3 vmathV3ScalarDiv_V( VmathVector3 vec, float scalar );
-
-/*
- * Negate all elements of a 3-D vector
- */
-static inline VmathVector3 vmathV3Neg_V( VmathVector3 vec );
-
-/*
- * Construct x axis
- */
-static inline VmathVector3 vmathV3MakeXAxis_V( );
-
-/*
- * Construct y axis
- */
-static inline VmathVector3 vmathV3MakeYAxis_V( );
-
-/*
- * Construct z axis
- */
-static inline VmathVector3 vmathV3MakeZAxis_V( );
-
-/*
- * Multiply two 3-D vectors per element
- */
-static inline VmathVector3 vmathV3MulPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Divide two 3-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathVector3 vmathV3DivPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Compute the reciprocal of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathVector3 vmathV3RecipPerElem_V( VmathVector3 vec );
-
-/*
- * Compute the square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathVector3 vmathV3SqrtPerElem_V( VmathVector3 vec );
-
-/*
- * Compute the reciprocal square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathVector3 vmathV3RsqrtPerElem_V( VmathVector3 vec );
-
-/*
- * Compute the absolute value of a 3-D vector per element
- */
-static inline VmathVector3 vmathV3AbsPerElem_V( VmathVector3 vec );
-
-/*
- * Copy sign from one 3-D vector to another, per element
- */
-static inline VmathVector3 vmathV3CopySignPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Maximum of two 3-D vectors per element
- */
-static inline VmathVector3 vmathV3MaxPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Minimum of two 3-D vectors per element
- */
-static inline VmathVector3 vmathV3MinPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Maximum element of a 3-D vector
- */
-static inline float vmathV3MaxElem_V( VmathVector3 vec );
-
-/*
- * Minimum element of a 3-D vector
- */
-static inline float vmathV3MinElem_V( VmathVector3 vec );
-
-/*
- * Compute the sum of all elements of a 3-D vector
- */
-static inline float vmathV3Sum_V( VmathVector3 vec );
-
-/*
- * Compute the dot product of two 3-D vectors
- */
-static inline float vmathV3Dot_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Compute the square of the length of a 3-D vector
- */
-static inline float vmathV3LengthSqr_V( VmathVector3 vec );
-
-/*
- * Compute the length of a 3-D vector
- */
-static inline float vmathV3Length_V( VmathVector3 vec );
-
-/*
- * Normalize a 3-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline VmathVector3 vmathV3Normalize_V( VmathVector3 vec );
-
-/*
- * Compute cross product of two 3-D vectors
- */
-static inline VmathVector3 vmathV3Cross_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Outer product of two 3-D vectors
- */
-static inline VmathMatrix3 vmathV3Outer_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Pre-multiply a row vector by a 3x3 matrix
- */
-static inline VmathVector3 vmathV3RowMul_V( VmathVector3 vec, VmathMatrix3 mat );
-
-/*
- * Cross-product matrix of a 3-D vector
- */
-static inline VmathMatrix3 vmathV3CrossMatrix_V( VmathVector3 vec );
-
-/*
- * Create cross-product matrix and multiply
- * NOTE: 
- * Faster than separately creating a cross-product matrix and multiplying.
- */
-static inline VmathMatrix3 vmathV3CrossMatrixMul_V( VmathVector3 vec, VmathMatrix3 mat );
-
-/*
- * Linear interpolation between two 3-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathVector3 vmathV3Lerp_V( float t, VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Spherical linear interpolation between two 3-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathVector3 vmathV3Slerp_V( float t, VmathVector3 unitVec0, VmathVector3 unitVec1 );
-
-/*
- * Conditionally select between two 3-D vectors
- */
-static inline VmathVector3 vmathV3Select_V( VmathVector3 vec0, VmathVector3 vec1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV3Print_V( VmathVector3 vec );
-
-/*
- * Print a 3-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV3Prints_V( VmathVector3 vec, const char *name );
-
-#endif
-
-/*
- * Construct a 4-D vector from x, y, z, and w elements
- */
-static inline VmathVector4 vmathV4MakeFromElems_V( float x, float y, float z, float w );
-
-/*
- * Construct a 4-D vector from a 3-D vector and a scalar
- */
-static inline VmathVector4 vmathV4MakeFromV3Scalar_V( VmathVector3 xyz, float w );
-
-/*
- * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
- */
-static inline VmathVector4 vmathV4MakeFromV3_V( VmathVector3 vec );
-
-/*
- * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
- */
-static inline VmathVector4 vmathV4MakeFromP3_V( VmathPoint3 pnt );
-
-/*
- * Copy elements from a quaternion into a 4-D vector
- */
-static inline VmathVector4 vmathV4MakeFromQ_V( VmathQuat quat );
-
-/*
- * Set all elements of a 4-D vector to the same scalar value
- */
-static inline VmathVector4 vmathV4MakeFromScalar_V( float scalar );
-
-/*
- * Set the x, y, and z elements of a 4-D vector
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathV4SetXYZ_V( VmathVector4 *result, VmathVector3 vec );
-
-/*
- * Get the x, y, and z elements of a 4-D vector
- */
-static inline VmathVector3 vmathV4GetXYZ_V( VmathVector4 vec );
-
-/*
- * Set the x element of a 4-D vector
- */
-static inline void vmathV4SetX_V( VmathVector4 *result, float x );
-
-/*
- * Set the y element of a 4-D vector
- */
-static inline void vmathV4SetY_V( VmathVector4 *result, float y );
-
-/*
- * Set the z element of a 4-D vector
- */
-static inline void vmathV4SetZ_V( VmathVector4 *result, float z );
-
-/*
- * Set the w element of a 4-D vector
- */
-static inline void vmathV4SetW_V( VmathVector4 *result, float w );
-
-/*
- * Get the x element of a 4-D vector
- */
-static inline float vmathV4GetX_V( VmathVector4 vec );
-
-/*
- * Get the y element of a 4-D vector
- */
-static inline float vmathV4GetY_V( VmathVector4 vec );
-
-/*
- * Get the z element of a 4-D vector
- */
-static inline float vmathV4GetZ_V( VmathVector4 vec );
-
-/*
- * Get the w element of a 4-D vector
- */
-static inline float vmathV4GetW_V( VmathVector4 vec );
-
-/*
- * Set an x, y, z, or w element of a 4-D vector by index
- */
-static inline void vmathV4SetElem_V( VmathVector4 *result, int idx, float value );
-
-/*
- * Get an x, y, z, or w element of a 4-D vector by index
- */
-static inline float vmathV4GetElem_V( VmathVector4 vec, int idx );
-
-/*
- * Add two 4-D vectors
- */
-static inline VmathVector4 vmathV4Add_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Subtract a 4-D vector from another 4-D vector
- */
-static inline VmathVector4 vmathV4Sub_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Multiply a 4-D vector by a scalar
- */
-static inline VmathVector4 vmathV4ScalarMul_V( VmathVector4 vec, float scalar );
-
-/*
- * Divide a 4-D vector by a scalar
- */
-static inline VmathVector4 vmathV4ScalarDiv_V( VmathVector4 vec, float scalar );
-
-/*
- * Negate all elements of a 4-D vector
- */
-static inline VmathVector4 vmathV4Neg_V( VmathVector4 vec );
-
-/*
- * Construct x axis
- */
-static inline VmathVector4 vmathV4MakeXAxis_V( );
-
-/*
- * Construct y axis
- */
-static inline VmathVector4 vmathV4MakeYAxis_V( );
-
-/*
- * Construct z axis
- */
-static inline VmathVector4 vmathV4MakeZAxis_V( );
-
-/*
- * Construct w axis
- */
-static inline VmathVector4 vmathV4MakeWAxis_V( );
-
-/*
- * Multiply two 4-D vectors per element
- */
-static inline VmathVector4 vmathV4MulPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Divide two 4-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathVector4 vmathV4DivPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Compute the reciprocal of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathVector4 vmathV4RecipPerElem_V( VmathVector4 vec );
-
-/*
- * Compute the square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathVector4 vmathV4SqrtPerElem_V( VmathVector4 vec );
-
-/*
- * Compute the reciprocal square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathVector4 vmathV4RsqrtPerElem_V( VmathVector4 vec );
-
-/*
- * Compute the absolute value of a 4-D vector per element
- */
-static inline VmathVector4 vmathV4AbsPerElem_V( VmathVector4 vec );
-
-/*
- * Copy sign from one 4-D vector to another, per element
- */
-static inline VmathVector4 vmathV4CopySignPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Maximum of two 4-D vectors per element
- */
-static inline VmathVector4 vmathV4MaxPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Minimum of two 4-D vectors per element
- */
-static inline VmathVector4 vmathV4MinPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Maximum element of a 4-D vector
- */
-static inline float vmathV4MaxElem_V( VmathVector4 vec );
-
-/*
- * Minimum element of a 4-D vector
- */
-static inline float vmathV4MinElem_V( VmathVector4 vec );
-
-/*
- * Compute the sum of all elements of a 4-D vector
- */
-static inline float vmathV4Sum_V( VmathVector4 vec );
-
-/*
- * Compute the dot product of two 4-D vectors
- */
-static inline float vmathV4Dot_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Compute the square of the length of a 4-D vector
- */
-static inline float vmathV4LengthSqr_V( VmathVector4 vec );
-
-/*
- * Compute the length of a 4-D vector
- */
-static inline float vmathV4Length_V( VmathVector4 vec );
-
-/*
- * Normalize a 4-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline VmathVector4 vmathV4Normalize_V( VmathVector4 vec );
-
-/*
- * Outer product of two 4-D vectors
- */
-static inline VmathMatrix4 vmathV4Outer_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Linear interpolation between two 4-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathVector4 vmathV4Lerp_V( float t, VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Spherical linear interpolation between two 4-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathVector4 vmathV4Slerp_V( float t, VmathVector4 unitVec0, VmathVector4 unitVec1 );
-
-/*
- * Conditionally select between two 4-D vectors
- */
-static inline VmathVector4 vmathV4Select_V( VmathVector4 vec0, VmathVector4 vec1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV4Print_V( VmathVector4 vec );
-
-/*
- * Print a 4-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV4Prints_V( VmathVector4 vec, const char *name );
-
-#endif
-
-/*
- * Construct a 3-D point from x, y, and z elements
- */
-static inline VmathPoint3 vmathP3MakeFromElems_V( float x, float y, float z );
-
-/*
- * Copy elements from a 3-D vector into a 3-D point
- */
-static inline VmathPoint3 vmathP3MakeFromV3_V( VmathVector3 vec );
-
-/*
- * Set all elements of a 3-D point to the same scalar value
- */
-static inline VmathPoint3 vmathP3MakeFromScalar_V( float scalar );
-
-/*
- * Set the x element of a 3-D point
- */
-static inline void vmathP3SetX_V( VmathPoint3 *result, float x );
-
-/*
- * Set the y element of a 3-D point
- */
-static inline void vmathP3SetY_V( VmathPoint3 *result, float y );
-
-/*
- * Set the z element of a 3-D point
- */
-static inline void vmathP3SetZ_V( VmathPoint3 *result, float z );
-
-/*
- * Get the x element of a 3-D point
- */
-static inline float vmathP3GetX_V( VmathPoint3 pnt );
-
-/*
- * Get the y element of a 3-D point
- */
-static inline float vmathP3GetY_V( VmathPoint3 pnt );
-
-/*
- * Get the z element of a 3-D point
- */
-static inline float vmathP3GetZ_V( VmathPoint3 pnt );
-
-/*
- * Set an x, y, or z element of a 3-D point by index
- */
-static inline void vmathP3SetElem_V( VmathPoint3 *result, int idx, float value );
-
-/*
- * Get an x, y, or z element of a 3-D point by index
- */
-static inline float vmathP3GetElem_V( VmathPoint3 pnt, int idx );
-
-/*
- * Subtract a 3-D point from another 3-D point
- */
-static inline VmathVector3 vmathP3Sub_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Add a 3-D point to a 3-D vector
- */
-static inline VmathPoint3 vmathP3AddV3_V( VmathPoint3 pnt, VmathVector3 vec );
-
-/*
- * Subtract a 3-D vector from a 3-D point
- */
-static inline VmathPoint3 vmathP3SubV3_V( VmathPoint3 pnt, VmathVector3 vec );
-
-/*
- * Multiply two 3-D points per element
- */
-static inline VmathPoint3 vmathP3MulPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Divide two 3-D points per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathPoint3 vmathP3DivPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Compute the reciprocal of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathPoint3 vmathP3RecipPerElem_V( VmathPoint3 pnt );
-
-/*
- * Compute the square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathPoint3 vmathP3SqrtPerElem_V( VmathPoint3 pnt );
-
-/*
- * Compute the reciprocal square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathPoint3 vmathP3RsqrtPerElem_V( VmathPoint3 pnt );
-
-/*
- * Compute the absolute value of a 3-D point per element
- */
-static inline VmathPoint3 vmathP3AbsPerElem_V( VmathPoint3 pnt );
-
-/*
- * Copy sign from one 3-D point to another, per element
- */
-static inline VmathPoint3 vmathP3CopySignPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Maximum of two 3-D points per element
- */
-static inline VmathPoint3 vmathP3MaxPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Minimum of two 3-D points per element
- */
-static inline VmathPoint3 vmathP3MinPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Maximum element of a 3-D point
- */
-static inline float vmathP3MaxElem_V( VmathPoint3 pnt );
-
-/*
- * Minimum element of a 3-D point
- */
-static inline float vmathP3MinElem_V( VmathPoint3 pnt );
-
-/*
- * Compute the sum of all elements of a 3-D point
- */
-static inline float vmathP3Sum_V( VmathPoint3 pnt );
-
-/*
- * Apply uniform scale to a 3-D point
- */
-static inline VmathPoint3 vmathP3Scale_V( VmathPoint3 pnt, float scaleVal );
-
-/*
- * Apply non-uniform scale to a 3-D point
- */
-static inline VmathPoint3 vmathP3NonUniformScale_V( VmathPoint3 pnt, VmathVector3 scaleVec );
-
-/*
- * Scalar projection of a 3-D point on a unit-length 3-D vector
- */
-static inline float vmathP3Projection_V( VmathPoint3 pnt, VmathVector3 unitVec );
-
-/*
- * Compute the square of the distance of a 3-D point from the coordinate-system origin
- */
-static inline float vmathP3DistSqrFromOrigin_V( VmathPoint3 pnt );
-
-/*
- * Compute the distance of a 3-D point from the coordinate-system origin
- */
-static inline float vmathP3DistFromOrigin_V( VmathPoint3 pnt );
-
-/*
- * Compute the square of the distance between two 3-D points
- */
-static inline float vmathP3DistSqr_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Compute the distance between two 3-D points
- */
-static inline float vmathP3Dist_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Linear interpolation between two 3-D points
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathPoint3 vmathP3Lerp_V( float t, VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Conditionally select between two 3-D points
- */
-static inline VmathPoint3 vmathP3Select_V( VmathPoint3 pnt0, VmathPoint3 pnt1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D point
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathP3Print_V( VmathPoint3 pnt );
-
-/*
- * Print a 3-D point and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathP3Prints_V( VmathPoint3 pnt, const char *name );
-
-#endif
-
-/*
- * Construct a quaternion from x, y, z, and w elements
- */
-static inline VmathQuat vmathQMakeFromElems_V( float x, float y, float z, float w );
-
-/*
- * Construct a quaternion from a 3-D vector and a scalar
- */
-static inline VmathQuat vmathQMakeFromV3Scalar_V( VmathVector3 xyz, float w );
-
-/*
- * Copy elements from a 4-D vector into a quaternion
- */
-static inline VmathQuat vmathQMakeFromV4_V( VmathVector4 vec );
-
-/*
- * Convert a rotation matrix to a unit-length quaternion
- */
-static inline VmathQuat vmathQMakeFromM3_V( VmathMatrix3 rotMat );
-
-/*
- * Set all elements of a quaternion to the same scalar value
- */
-static inline VmathQuat vmathQMakeFromScalar_V( float scalar );
-
-/*
- * Set the x, y, and z elements of a quaternion
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathQSetXYZ_V( VmathQuat *result, VmathVector3 vec );
-
-/*
- * Get the x, y, and z elements of a quaternion
- */
-static inline VmathVector3 vmathQGetXYZ_V( VmathQuat quat );
-
-/*
- * Set the x element of a quaternion
- */
-static inline void vmathQSetX_V( VmathQuat *result, float x );
-
-/*
- * Set the y element of a quaternion
- */
-static inline void vmathQSetY_V( VmathQuat *result, float y );
-
-/*
- * Set the z element of a quaternion
- */
-static inline void vmathQSetZ_V( VmathQuat *result, float z );
-
-/*
- * Set the w element of a quaternion
- */
-static inline void vmathQSetW_V( VmathQuat *result, float w );
-
-/*
- * Get the x element of a quaternion
- */
-static inline float vmathQGetX_V( VmathQuat quat );
-
-/*
- * Get the y element of a quaternion
- */
-static inline float vmathQGetY_V( VmathQuat quat );
-
-/*
- * Get the z element of a quaternion
- */
-static inline float vmathQGetZ_V( VmathQuat quat );
-
-/*
- * Get the w element of a quaternion
- */
-static inline float vmathQGetW_V( VmathQuat quat );
-
-/*
- * Set an x, y, z, or w element of a quaternion by index
- */
-static inline void vmathQSetElem_V( VmathQuat *result, int idx, float value );
-
-/*
- * Get an x, y, z, or w element of a quaternion by index
- */
-static inline float vmathQGetElem_V( VmathQuat quat, int idx );
-
-/*
- * Add two quaternions
- */
-static inline VmathQuat vmathQAdd_V( VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Subtract a quaternion from another quaternion
- */
-static inline VmathQuat vmathQSub_V( VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Multiply two quaternions
- */
-static inline VmathQuat vmathQMul_V( VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Multiply a quaternion by a scalar
- */
-static inline VmathQuat vmathQScalarMul_V( VmathQuat quat, float scalar );
-
-/*
- * Divide a quaternion by a scalar
- */
-static inline VmathQuat vmathQScalarDiv_V( VmathQuat quat, float scalar );
-
-/*
- * Negate all elements of a quaternion
- */
-static inline VmathQuat vmathQNeg_V( VmathQuat quat );
-
-/*
- * Construct an identity quaternion
- */
-static inline VmathQuat vmathQMakeIdentity_V( );
-
-/*
- * Construct a quaternion to rotate between two unit-length 3-D vectors
- * NOTE: 
- * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
- */
-static inline VmathQuat vmathQMakeRotationArc_V( VmathVector3 unitVec0, VmathVector3 unitVec1 );
-
-/*
- * Construct a quaternion to rotate around a unit-length 3-D vector
- */
-static inline VmathQuat vmathQMakeRotationAxis_V( float radians, VmathVector3 unitVec );
-
-/*
- * Construct a quaternion to rotate around the x axis
- */
-static inline VmathQuat vmathQMakeRotationX_V( float radians );
-
-/*
- * Construct a quaternion to rotate around the y axis
- */
-static inline VmathQuat vmathQMakeRotationY_V( float radians );
-
-/*
- * Construct a quaternion to rotate around the z axis
- */
-static inline VmathQuat vmathQMakeRotationZ_V( float radians );
-
-/*
- * Compute the conjugate of a quaternion
- */
-static inline VmathQuat vmathQConj_V( VmathQuat quat );
-
-/*
- * Use a unit-length quaternion to rotate a 3-D vector
- */
-static inline VmathVector3 vmathQRotate_V( VmathQuat unitQuat, VmathVector3 vec );
-
-/*
- * Compute the dot product of two quaternions
- */
-static inline float vmathQDot_V( VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Compute the norm of a quaternion
- */
-static inline float vmathQNorm_V( VmathQuat quat );
-
-/*
- * Compute the length of a quaternion
- */
-static inline float vmathQLength_V( VmathQuat quat );
-
-/*
- * Normalize a quaternion
- * NOTE: 
- * The result is unpredictable when all elements of quat are at or near zero.
- */
-static inline VmathQuat vmathQNormalize_V( VmathQuat quat );
-
-/*
- * Linear interpolation between two quaternions
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathQuat vmathQLerp_V( float t, VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Spherical linear interpolation between two quaternions
- * NOTE: 
- * Interpolates along the shortest path between orientations.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathQuat vmathQSlerp_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1 );
-
-/*
- * Spherical quadrangle interpolation
- */
-static inline VmathQuat vmathQSquad_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1, VmathQuat unitQuat2, VmathQuat unitQuat3 );
-
-/*
- * Conditionally select between two quaternions
- */
-static inline VmathQuat vmathQSelect_V( VmathQuat quat0, VmathQuat quat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a quaternion
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathQPrint_V( VmathQuat quat );
-
-/*
- * Print a quaternion and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathQPrints_V( VmathQuat quat, const char *name );
-
-#endif
-
-/*
- * Construct a 3x3 matrix containing the specified columns
- */
-static inline VmathMatrix3 vmathM3MakeFromCols_V( VmathVector3 col0, VmathVector3 col1, VmathVector3 col2 );
-
-/*
- * Construct a 3x3 rotation matrix from a unit-length quaternion
- */
-static inline VmathMatrix3 vmathM3MakeFromQ_V( VmathQuat unitQuat );
-
-/*
- * Set all elements of a 3x3 matrix to the same scalar value
- */
-static inline VmathMatrix3 vmathM3MakeFromScalar_V( float scalar );
-
-/*
- * Set column 0 of a 3x3 matrix
- */
-static inline void vmathM3SetCol0_V( VmathMatrix3 *result, VmathVector3 col0 );
-
-/*
- * Set column 1 of a 3x3 matrix
- */
-static inline void vmathM3SetCol1_V( VmathMatrix3 *result, VmathVector3 col1 );
-
-/*
- * Set column 2 of a 3x3 matrix
- */
-static inline void vmathM3SetCol2_V( VmathMatrix3 *result, VmathVector3 col2 );
-
-/*
- * Get column 0 of a 3x3 matrix
- */
-static inline VmathVector3 vmathM3GetCol0_V( VmathMatrix3 mat );
-
-/*
- * Get column 1 of a 3x3 matrix
- */
-static inline VmathVector3 vmathM3GetCol1_V( VmathMatrix3 mat );
-
-/*
- * Get column 2 of a 3x3 matrix
- */
-static inline VmathVector3 vmathM3GetCol2_V( VmathMatrix3 mat );
-
-/*
- * Set the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3SetCol_V( VmathMatrix3 *result, int col, VmathVector3 vec );
-
-/*
- * Set the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3SetRow_V( VmathMatrix3 *result, int row, VmathVector3 vec );
-
-/*
- * Get the column of a 3x3 matrix referred to by the specified index
- */
-static inline VmathVector3 vmathM3GetCol_V( VmathMatrix3 mat, int col );
-
-/*
- * Get the row of a 3x3 matrix referred to by the specified index
- */
-static inline VmathVector3 vmathM3GetRow_V( VmathMatrix3 mat, int row );
-
-/*
- * Set the element of a 3x3 matrix referred to by column and row indices
- */
-static inline void vmathM3SetElem_V( VmathMatrix3 *result, int col, int row, float val );
-
-/*
- * Get the element of a 3x3 matrix referred to by column and row indices
- */
-static inline float vmathM3GetElem_V( VmathMatrix3 mat, int col, int row );
-
-/*
- * Add two 3x3 matrices
- */
-static inline VmathMatrix3 vmathM3Add_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
-
-/*
- * Subtract a 3x3 matrix from another 3x3 matrix
- */
-static inline VmathMatrix3 vmathM3Sub_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
-
-/*
- * Negate all elements of a 3x3 matrix
- */
-static inline VmathMatrix3 vmathM3Neg_V( VmathMatrix3 mat );
-
-/*
- * Multiply a 3x3 matrix by a scalar
- */
-static inline VmathMatrix3 vmathM3ScalarMul_V( VmathMatrix3 mat, float scalar );
-
-/*
- * Multiply a 3x3 matrix by a 3-D vector
- */
-static inline VmathVector3 vmathM3MulV3_V( VmathMatrix3 mat, VmathVector3 vec );
-
-/*
- * Multiply two 3x3 matrices
- */
-static inline VmathMatrix3 vmathM3Mul_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
-
-/*
- * Construct an identity 3x3 matrix
- */
-static inline VmathMatrix3 vmathM3MakeIdentity_V( );
-
-/*
- * Construct a 3x3 matrix to rotate around the x axis
- */
-static inline VmathMatrix3 vmathM3MakeRotationX_V( float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the y axis
- */
-static inline VmathMatrix3 vmathM3MakeRotationY_V( float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the z axis
- */
-static inline VmathMatrix3 vmathM3MakeRotationZ_V( float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the x, y, and z axes
- */
-static inline VmathMatrix3 vmathM3MakeRotationZYX_V( VmathVector3 radiansXYZ );
-
-/*
- * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathMatrix3 vmathM3MakeRotationAxis_V( float radians, VmathVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathMatrix3 vmathM3MakeRotationQ_V( VmathQuat unitQuat );
-
-/*
- * Construct a 3x3 matrix to perform scaling
- */
-static inline VmathMatrix3 vmathM3MakeScale_V( VmathVector3 scaleVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathMatrix3 vmathM3AppendScale_V( VmathMatrix3 mat, VmathVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathMatrix3 vmathM3PrependScale_V( VmathVector3 scaleVec, VmathMatrix3 mat );
-
-/*
- * Multiply two 3x3 matrices per element
- */
-static inline VmathMatrix3 vmathM3MulPerElem_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
-
-/*
- * Compute the absolute value of a 3x3 matrix per element
- */
-static inline VmathMatrix3 vmathM3AbsPerElem_V( VmathMatrix3 mat );
-
-/*
- * Transpose of a 3x3 matrix
- */
-static inline VmathMatrix3 vmathM3Transpose_V( VmathMatrix3 mat );
-
-/*
- * Compute the inverse of a 3x3 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathMatrix3 vmathM3Inverse_V( VmathMatrix3 mat );
-
-/*
- * Determinant of a 3x3 matrix
- */
-static inline float vmathM3Determinant_V( VmathMatrix3 mat );
-
-/*
- * Conditionally select between two 3x3 matrices
- */
-static inline VmathMatrix3 vmathM3Select_V( VmathMatrix3 mat0, VmathMatrix3 mat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x3 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM3Print_V( VmathMatrix3 mat );
-
-/*
- * Print a 3x3 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM3Prints_V( VmathMatrix3 mat, const char *name );
-
-#endif
-
-/*
- * Construct a 4x4 matrix containing the specified columns
- */
-static inline VmathMatrix4 vmathM4MakeFromCols_V( VmathVector4 col0, VmathVector4 col1, VmathVector4 col2, VmathVector4 col3 );
-
-/*
- * Construct a 4x4 matrix from a 3x4 transformation matrix
- */
-static inline VmathMatrix4 vmathM4MakeFromT3_V( VmathTransform3 mat );
-
-/*
- * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
- */
-static inline VmathMatrix4 vmathM4MakeFromM3V3_V( VmathMatrix3 mat, VmathVector3 translateVec );
-
-/*
- * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
- */
-static inline VmathMatrix4 vmathM4MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec );
-
-/*
- * Set all elements of a 4x4 matrix to the same scalar value
- */
-static inline VmathMatrix4 vmathM4MakeFromScalar_V( float scalar );
-
-/*
- * Set the upper-left 3x3 submatrix
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathM4SetUpper3x3_V( VmathMatrix4 *result, VmathMatrix3 mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 4x4 matrix
- */
-static inline VmathMatrix3 vmathM4GetUpper3x3_V( VmathMatrix4 mat );
-
-/*
- * Set translation component
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathM4SetTranslation_V( VmathMatrix4 *result, VmathVector3 translateVec );
-
-/*
- * Get the translation component of a 4x4 matrix
- */
-static inline VmathVector3 vmathM4GetTranslation_V( VmathMatrix4 mat );
-
-/*
- * Set column 0 of a 4x4 matrix
- */
-static inline void vmathM4SetCol0_V( VmathMatrix4 *result, VmathVector4 col0 );
-
-/*
- * Set column 1 of a 4x4 matrix
- */
-static inline void vmathM4SetCol1_V( VmathMatrix4 *result, VmathVector4 col1 );
-
-/*
- * Set column 2 of a 4x4 matrix
- */
-static inline void vmathM4SetCol2_V( VmathMatrix4 *result, VmathVector4 col2 );
-
-/*
- * Set column 3 of a 4x4 matrix
- */
-static inline void vmathM4SetCol3_V( VmathMatrix4 *result, VmathVector4 col3 );
-
-/*
- * Get column 0 of a 4x4 matrix
- */
-static inline VmathVector4 vmathM4GetCol0_V( VmathMatrix4 mat );
-
-/*
- * Get column 1 of a 4x4 matrix
- */
-static inline VmathVector4 vmathM4GetCol1_V( VmathMatrix4 mat );
-
-/*
- * Get column 2 of a 4x4 matrix
- */
-static inline VmathVector4 vmathM4GetCol2_V( VmathMatrix4 mat );
-
-/*
- * Get column 3 of a 4x4 matrix
- */
-static inline VmathVector4 vmathM4GetCol3_V( VmathMatrix4 mat );
-
-/*
- * Set the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4SetCol_V( VmathMatrix4 *result, int col, VmathVector4 vec );
-
-/*
- * Set the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4SetRow_V( VmathMatrix4 *result, int row, VmathVector4 vec );
-
-/*
- * Get the column of a 4x4 matrix referred to by the specified index
- */
-static inline VmathVector4 vmathM4GetCol_V( VmathMatrix4 mat, int col );
-
-/*
- * Get the row of a 4x4 matrix referred to by the specified index
- */
-static inline VmathVector4 vmathM4GetRow_V( VmathMatrix4 mat, int row );
-
-/*
- * Set the element of a 4x4 matrix referred to by column and row indices
- */
-static inline void vmathM4SetElem_V( VmathMatrix4 *result, int col, int row, float val );
-
-/*
- * Get the element of a 4x4 matrix referred to by column and row indices
- */
-static inline float vmathM4GetElem_V( VmathMatrix4 mat, int col, int row );
-
-/*
- * Add two 4x4 matrices
- */
-static inline VmathMatrix4 vmathM4Add_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
-
-/*
- * Subtract a 4x4 matrix from another 4x4 matrix
- */
-static inline VmathMatrix4 vmathM4Sub_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
-
-/*
- * Negate all elements of a 4x4 matrix
- */
-static inline VmathMatrix4 vmathM4Neg_V( VmathMatrix4 mat );
-
-/*
- * Multiply a 4x4 matrix by a scalar
- */
-static inline VmathMatrix4 vmathM4ScalarMul_V( VmathMatrix4 mat, float scalar );
-
-/*
- * Multiply a 4x4 matrix by a 4-D vector
- */
-static inline VmathVector4 vmathM4MulV4_V( VmathMatrix4 mat, VmathVector4 vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D vector
- */
-static inline VmathVector4 vmathM4MulV3_V( VmathMatrix4 mat, VmathVector3 vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D point
- */
-static inline VmathVector4 vmathM4MulP3_V( VmathMatrix4 mat, VmathPoint3 pnt );
-
-/*
- * Multiply two 4x4 matrices
- */
-static inline VmathMatrix4 vmathM4Mul_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
-
-/*
- * Multiply a 4x4 matrix by a 3x4 transformation matrix
- */
-static inline VmathMatrix4 vmathM4MulT3_V( VmathMatrix4 mat, VmathTransform3 tfrm );
-
-/*
- * Construct an identity 4x4 matrix
- */
-static inline VmathMatrix4 vmathM4MakeIdentity_V( );
-
-/*
- * Construct a 4x4 matrix to rotate around the x axis
- */
-static inline VmathMatrix4 vmathM4MakeRotationX_V( float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the y axis
- */
-static inline VmathMatrix4 vmathM4MakeRotationY_V( float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the z axis
- */
-static inline VmathMatrix4 vmathM4MakeRotationZ_V( float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the x, y, and z axes
- */
-static inline VmathMatrix4 vmathM4MakeRotationZYX_V( VmathVector3 radiansXYZ );
-
-/*
- * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathMatrix4 vmathM4MakeRotationAxis_V( float radians, VmathVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathMatrix4 vmathM4MakeRotationQ_V( VmathQuat unitQuat );
-
-/*
- * Construct a 4x4 matrix to perform scaling
- */
-static inline VmathMatrix4 vmathM4MakeScale_V( VmathVector3 scaleVec );
-
-/*
- * Construct a 4x4 matrix to perform translation
- */
-static inline VmathMatrix4 vmathM4MakeTranslation_V( VmathVector3 translateVec );
-
-/*
- * Construct viewing matrix based on eye position, position looked at, and up direction
- */
-static inline VmathMatrix4 vmathM4MakeLookAt_V( VmathPoint3 eyePos, VmathPoint3 lookAtPos, VmathVector3 upVec );
-
-/*
- * Construct a perspective projection matrix
- */
-static inline VmathMatrix4 vmathM4MakePerspective_V( float fovyRadians, float aspect, float zNear, float zFar );
-
-/*
- * Construct a perspective projection matrix based on frustum
- */
-static inline VmathMatrix4 vmathM4MakeFrustum_V( float left, float right, float bottom, float top, float zNear, float zFar );
-
-/*
- * Construct an orthographic projection matrix
- */
-static inline VmathMatrix4 vmathM4MakeOrthographic_V( float left, float right, float bottom, float top, float zNear, float zFar );
-
-/*
- * Append (post-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathMatrix4 vmathM4AppendScale_V( VmathMatrix4 mat, VmathVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathMatrix4 vmathM4PrependScale_V( VmathVector3 scaleVec, VmathMatrix4 mat );
-
-/*
- * Multiply two 4x4 matrices per element
- */
-static inline VmathMatrix4 vmathM4MulPerElem_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
-
-/*
- * Compute the absolute value of a 4x4 matrix per element
- */
-static inline VmathMatrix4 vmathM4AbsPerElem_V( VmathMatrix4 mat );
-
-/*
- * Transpose of a 4x4 matrix
- */
-static inline VmathMatrix4 vmathM4Transpose_V( VmathMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathMatrix4 vmathM4Inverse_V( VmathMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathMatrix4 vmathM4AffineInverse_V( VmathMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
- */
-static inline VmathMatrix4 vmathM4OrthoInverse_V( VmathMatrix4 mat );
-
-/*
- * Determinant of a 4x4 matrix
- */
-static inline float vmathM4Determinant_V( VmathMatrix4 mat );
-
-/*
- * Conditionally select between two 4x4 matrices
- */
-static inline VmathMatrix4 vmathM4Select_V( VmathMatrix4 mat0, VmathMatrix4 mat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4x4 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM4Print_V( VmathMatrix4 mat );
-
-/*
- * Print a 4x4 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM4Prints_V( VmathMatrix4 mat, const char *name );
-
-#endif
-
-/*
- * Construct a 3x4 transformation matrix containing the specified columns
- */
-static inline VmathTransform3 vmathT3MakeFromCols_V( VmathVector3 col0, VmathVector3 col1, VmathVector3 col2, VmathVector3 col3 );
-
-/*
- * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
- */
-static inline VmathTransform3 vmathT3MakeFromM3V3_V( VmathMatrix3 tfrm, VmathVector3 translateVec );
-
-/*
- * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
- */
-static inline VmathTransform3 vmathT3MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec );
-
-/*
- * Set all elements of a 3x4 transformation matrix to the same scalar value
- */
-static inline VmathTransform3 vmathT3MakeFromScalar_V( float scalar );
-
-/*
- * Set the upper-left 3x3 submatrix
- */
-static inline void vmathT3SetUpper3x3_V( VmathTransform3 *result, VmathMatrix3 mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
- */
-static inline VmathMatrix3 vmathT3GetUpper3x3_V( VmathTransform3 tfrm );
-
-/*
- * Set translation component
- */
-static inline void vmathT3SetTranslation_V( VmathTransform3 *result, VmathVector3 translateVec );
-
-/*
- * Get the translation component of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetTranslation_V( VmathTransform3 tfrm );
-
-/*
- * Set column 0 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol0_V( VmathTransform3 *result, VmathVector3 col0 );
-
-/*
- * Set column 1 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol1_V( VmathTransform3 *result, VmathVector3 col1 );
-
-/*
- * Set column 2 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol2_V( VmathTransform3 *result, VmathVector3 col2 );
-
-/*
- * Set column 3 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol3_V( VmathTransform3 *result, VmathVector3 col3 );
-
-/*
- * Get column 0 of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetCol0_V( VmathTransform3 tfrm );
-
-/*
- * Get column 1 of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetCol1_V( VmathTransform3 tfrm );
-
-/*
- * Get column 2 of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetCol2_V( VmathTransform3 tfrm );
-
-/*
- * Get column 3 of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetCol3_V( VmathTransform3 tfrm );
-
-/*
- * Set the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3SetCol_V( VmathTransform3 *result, int col, VmathVector3 vec );
-
-/*
- * Set the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3SetRow_V( VmathTransform3 *result, int row, VmathVector4 vec );
-
-/*
- * Get the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline VmathVector3 vmathT3GetCol_V( VmathTransform3 tfrm, int col );
-
-/*
- * Get the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline VmathVector4 vmathT3GetRow_V( VmathTransform3 tfrm, int row );
-
-/*
- * Set the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline void vmathT3SetElem_V( VmathTransform3 *result, int col, int row, float val );
-
-/*
- * Get the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline float vmathT3GetElem_V( VmathTransform3 tfrm, int col, int row );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D vector
- */
-static inline VmathVector3 vmathT3MulV3_V( VmathTransform3 tfrm, VmathVector3 vec );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D point
- */
-static inline VmathPoint3 vmathT3MulP3_V( VmathTransform3 tfrm, VmathPoint3 pnt );
-
-/*
- * Multiply two 3x4 transformation matrices
- */
-static inline VmathTransform3 vmathT3Mul_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 );
-
-/*
- * Construct an identity 3x4 transformation matrix
- */
-static inline VmathTransform3 vmathT3MakeIdentity_V( );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x axis
- */
-static inline VmathTransform3 vmathT3MakeRotationX_V( float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the y axis
- */
-static inline VmathTransform3 vmathT3MakeRotationY_V( float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the z axis
- */
-static inline VmathTransform3 vmathT3MakeRotationZ_V( float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
- */
-static inline VmathTransform3 vmathT3MakeRotationZYX_V( VmathVector3 radiansXYZ );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathTransform3 vmathT3MakeRotationAxis_V( float radians, VmathVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathTransform3 vmathT3MakeRotationQ_V( VmathQuat unitQuat );
-
-/*
- * Construct a 3x4 transformation matrix to perform scaling
- */
-static inline VmathTransform3 vmathT3MakeScale_V( VmathVector3 scaleVec );
-
-/*
- * Construct a 3x4 transformation matrix to perform translation
- */
-static inline VmathTransform3 vmathT3MakeTranslation_V( VmathVector3 translateVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathTransform3 vmathT3AppendScale_V( VmathTransform3 tfrm, VmathVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathTransform3 vmathT3PrependScale_V( VmathVector3 scaleVec, VmathTransform3 tfrm );
-
-/*
- * Multiply two 3x4 transformation matrices per element
- */
-static inline VmathTransform3 vmathT3MulPerElem_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 );
-
-/*
- * Compute the absolute value of a 3x4 transformation matrix per element
- */
-static inline VmathTransform3 vmathT3AbsPerElem_V( VmathTransform3 tfrm );
-
-/*
- * Inverse of a 3x4 transformation matrix
- * NOTE: 
- * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
- */
-static inline VmathTransform3 vmathT3Inverse_V( VmathTransform3 tfrm );
-
-/*
- * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
- */
-static inline VmathTransform3 vmathT3OrthoInverse_V( VmathTransform3 tfrm );
-
-/*
- * Conditionally select between two 3x4 transformation matrices
- */
-static inline VmathTransform3 vmathT3Select_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x4 transformation matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathT3Print_V( VmathTransform3 tfrm );
-
-/*
- * Print a 3x4 transformation matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathT3Prints_V( VmathTransform3 tfrm, const char *name );
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#include "vectormath_aos.h"
-#include "vec_aos_v.h"
-#include "quat_aos_v.h"
-#include "mat_aos_v.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_AOS_C_V_SCALAR_H
+#define _VECTORMATH_AOS_C_V_SCALAR_H
+
+#include <math.h>
+
+#ifdef _VECTORMATH_DEBUG
+#include <stdio.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef _VECTORMATH_AOS_C_TYPES_H
+#define _VECTORMATH_AOS_C_TYPES_H
+
+/* A 3-D vector in array-of-structures format
+ */
+typedef struct _VmathVector3
+{
+    float x;
+    float y;
+    float z;
+#ifndef __GNUC__
+    float d;
+#endif
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+VmathVector3;
+
+/* A 4-D vector in array-of-structures format
+ */
+typedef struct _VmathVector4
+{
+    float x;
+    float y;
+    float z;
+    float w;
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+VmathVector4;
+
+/* A 3-D point in array-of-structures format
+ */
+typedef struct _VmathPoint3
+{
+    float x;
+    float y;
+    float z;
+#ifndef __GNUC__
+    float d;
+#endif
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+VmathPoint3;
+
+/* A quaternion in array-of-structures format
+ */
+typedef struct _VmathQuat
+{
+    float x;
+    float y;
+    float z;
+    float w;
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+VmathQuat;
+
+/* A 3x3 matrix in array-of-structures format
+ */
+typedef struct _VmathMatrix3
+{
+    VmathVector3 col0;
+    VmathVector3 col1;
+    VmathVector3 col2;
+} VmathMatrix3;
+
+/* A 4x4 matrix in array-of-structures format
+ */
+typedef struct _VmathMatrix4
+{
+    VmathVector4 col0;
+    VmathVector4 col1;
+    VmathVector4 col2;
+    VmathVector4 col3;
+} VmathMatrix4;
+
+/* A 3x4 transformation matrix in array-of-structures format
+ */
+typedef struct _VmathTransform3
+{
+    VmathVector3 col0;
+    VmathVector3 col1;
+    VmathVector3 col2;
+    VmathVector3 col3;
+} VmathTransform3;
+
+#endif
+
+/*
+ * Construct a 3-D vector from x, y, and z elements
+ */
+static inline VmathVector3 vmathV3MakeFromElems_V( float x, float y, float z );
+
+/*
+ * Copy elements from a 3-D point into a 3-D vector
+ */
+static inline VmathVector3 vmathV3MakeFromP3_V( VmathPoint3 pnt );
+
+/*
+ * Set all elements of a 3-D vector to the same scalar value
+ */
+static inline VmathVector3 vmathV3MakeFromScalar_V( float scalar );
+
+/*
+ * Set the x element of a 3-D vector
+ */
+static inline void vmathV3SetX_V( VmathVector3 *result, float x );
+
+/*
+ * Set the y element of a 3-D vector
+ */
+static inline void vmathV3SetY_V( VmathVector3 *result, float y );
+
+/*
+ * Set the z element of a 3-D vector
+ */
+static inline void vmathV3SetZ_V( VmathVector3 *result, float z );
+
+/*
+ * Get the x element of a 3-D vector
+ */
+static inline float vmathV3GetX_V( VmathVector3 vec );
+
+/*
+ * Get the y element of a 3-D vector
+ */
+static inline float vmathV3GetY_V( VmathVector3 vec );
+
+/*
+ * Get the z element of a 3-D vector
+ */
+static inline float vmathV3GetZ_V( VmathVector3 vec );
+
+/*
+ * Set an x, y, or z element of a 3-D vector by index
+ */
+static inline void vmathV3SetElem_V( VmathVector3 *result, int idx, float value );
+
+/*
+ * Get an x, y, or z element of a 3-D vector by index
+ */
+static inline float vmathV3GetElem_V( VmathVector3 vec, int idx );
+
+/*
+ * Add two 3-D vectors
+ */
+static inline VmathVector3 vmathV3Add_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Subtract a 3-D vector from another 3-D vector
+ */
+static inline VmathVector3 vmathV3Sub_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Add a 3-D vector to a 3-D point
+ */
+static inline VmathPoint3 vmathV3AddP3_V( VmathVector3 vec, VmathPoint3 pnt );
+
+/*
+ * Multiply a 3-D vector by a scalar
+ */
+static inline VmathVector3 vmathV3ScalarMul_V( VmathVector3 vec, float scalar );
+
+/*
+ * Divide a 3-D vector by a scalar
+ */
+static inline VmathVector3 vmathV3ScalarDiv_V( VmathVector3 vec, float scalar );
+
+/*
+ * Negate all elements of a 3-D vector
+ */
+static inline VmathVector3 vmathV3Neg_V( VmathVector3 vec );
+
+/*
+ * Construct x axis
+ */
+static inline VmathVector3 vmathV3MakeXAxis_V( );
+
+/*
+ * Construct y axis
+ */
+static inline VmathVector3 vmathV3MakeYAxis_V( );
+
+/*
+ * Construct z axis
+ */
+static inline VmathVector3 vmathV3MakeZAxis_V( );
+
+/*
+ * Multiply two 3-D vectors per element
+ */
+static inline VmathVector3 vmathV3MulPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Divide two 3-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathVector3 vmathV3DivPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Compute the reciprocal of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathVector3 vmathV3RecipPerElem_V( VmathVector3 vec );
+
+/*
+ * Compute the square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathVector3 vmathV3SqrtPerElem_V( VmathVector3 vec );
+
+/*
+ * Compute the reciprocal square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathVector3 vmathV3RsqrtPerElem_V( VmathVector3 vec );
+
+/*
+ * Compute the absolute value of a 3-D vector per element
+ */
+static inline VmathVector3 vmathV3AbsPerElem_V( VmathVector3 vec );
+
+/*
+ * Copy sign from one 3-D vector to another, per element
+ */
+static inline VmathVector3 vmathV3CopySignPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Maximum of two 3-D vectors per element
+ */
+static inline VmathVector3 vmathV3MaxPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Minimum of two 3-D vectors per element
+ */
+static inline VmathVector3 vmathV3MinPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Maximum element of a 3-D vector
+ */
+static inline float vmathV3MaxElem_V( VmathVector3 vec );
+
+/*
+ * Minimum element of a 3-D vector
+ */
+static inline float vmathV3MinElem_V( VmathVector3 vec );
+
+/*
+ * Compute the sum of all elements of a 3-D vector
+ */
+static inline float vmathV3Sum_V( VmathVector3 vec );
+
+/*
+ * Compute the dot product of two 3-D vectors
+ */
+static inline float vmathV3Dot_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Compute the square of the length of a 3-D vector
+ */
+static inline float vmathV3LengthSqr_V( VmathVector3 vec );
+
+/*
+ * Compute the length of a 3-D vector
+ */
+static inline float vmathV3Length_V( VmathVector3 vec );
+
+/*
+ * Normalize a 3-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline VmathVector3 vmathV3Normalize_V( VmathVector3 vec );
+
+/*
+ * Compute cross product of two 3-D vectors
+ */
+static inline VmathVector3 vmathV3Cross_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Outer product of two 3-D vectors
+ */
+static inline VmathMatrix3 vmathV3Outer_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Pre-multiply a row vector by a 3x3 matrix
+ */
+static inline VmathVector3 vmathV3RowMul_V( VmathVector3 vec, VmathMatrix3 mat );
+
+/*
+ * Cross-product matrix of a 3-D vector
+ */
+static inline VmathMatrix3 vmathV3CrossMatrix_V( VmathVector3 vec );
+
+/*
+ * Create cross-product matrix and multiply
+ * NOTE: 
+ * Faster than separately creating a cross-product matrix and multiplying.
+ */
+static inline VmathMatrix3 vmathV3CrossMatrixMul_V( VmathVector3 vec, VmathMatrix3 mat );
+
+/*
+ * Linear interpolation between two 3-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathVector3 vmathV3Lerp_V( float t, VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Spherical linear interpolation between two 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathVector3 vmathV3Slerp_V( float t, VmathVector3 unitVec0, VmathVector3 unitVec1 );
+
+/*
+ * Conditionally select between two 3-D vectors
+ */
+static inline VmathVector3 vmathV3Select_V( VmathVector3 vec0, VmathVector3 vec1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV3Print_V( VmathVector3 vec );
+
+/*
+ * Print a 3-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV3Prints_V( VmathVector3 vec, const char *name );
+
+#endif
+
+/*
+ * Construct a 4-D vector from x, y, z, and w elements
+ */
+static inline VmathVector4 vmathV4MakeFromElems_V( float x, float y, float z, float w );
+
+/*
+ * Construct a 4-D vector from a 3-D vector and a scalar
+ */
+static inline VmathVector4 vmathV4MakeFromV3Scalar_V( VmathVector3 xyz, float w );
+
+/*
+ * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+ */
+static inline VmathVector4 vmathV4MakeFromV3_V( VmathVector3 vec );
+
+/*
+ * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+ */
+static inline VmathVector4 vmathV4MakeFromP3_V( VmathPoint3 pnt );
+
+/*
+ * Copy elements from a quaternion into a 4-D vector
+ */
+static inline VmathVector4 vmathV4MakeFromQ_V( VmathQuat quat );
+
+/*
+ * Set all elements of a 4-D vector to the same scalar value
+ */
+static inline VmathVector4 vmathV4MakeFromScalar_V( float scalar );
+
+/*
+ * Set the x, y, and z elements of a 4-D vector
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathV4SetXYZ_V( VmathVector4 *result, VmathVector3 vec );
+
+/*
+ * Get the x, y, and z elements of a 4-D vector
+ */
+static inline VmathVector3 vmathV4GetXYZ_V( VmathVector4 vec );
+
+/*
+ * Set the x element of a 4-D vector
+ */
+static inline void vmathV4SetX_V( VmathVector4 *result, float x );
+
+/*
+ * Set the y element of a 4-D vector
+ */
+static inline void vmathV4SetY_V( VmathVector4 *result, float y );
+
+/*
+ * Set the z element of a 4-D vector
+ */
+static inline void vmathV4SetZ_V( VmathVector4 *result, float z );
+
+/*
+ * Set the w element of a 4-D vector
+ */
+static inline void vmathV4SetW_V( VmathVector4 *result, float w );
+
+/*
+ * Get the x element of a 4-D vector
+ */
+static inline float vmathV4GetX_V( VmathVector4 vec );
+
+/*
+ * Get the y element of a 4-D vector
+ */
+static inline float vmathV4GetY_V( VmathVector4 vec );
+
+/*
+ * Get the z element of a 4-D vector
+ */
+static inline float vmathV4GetZ_V( VmathVector4 vec );
+
+/*
+ * Get the w element of a 4-D vector
+ */
+static inline float vmathV4GetW_V( VmathVector4 vec );
+
+/*
+ * Set an x, y, z, or w element of a 4-D vector by index
+ */
+static inline void vmathV4SetElem_V( VmathVector4 *result, int idx, float value );
+
+/*
+ * Get an x, y, z, or w element of a 4-D vector by index
+ */
+static inline float vmathV4GetElem_V( VmathVector4 vec, int idx );
+
+/*
+ * Add two 4-D vectors
+ */
+static inline VmathVector4 vmathV4Add_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Subtract a 4-D vector from another 4-D vector
+ */
+static inline VmathVector4 vmathV4Sub_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Multiply a 4-D vector by a scalar
+ */
+static inline VmathVector4 vmathV4ScalarMul_V( VmathVector4 vec, float scalar );
+
+/*
+ * Divide a 4-D vector by a scalar
+ */
+static inline VmathVector4 vmathV4ScalarDiv_V( VmathVector4 vec, float scalar );
+
+/*
+ * Negate all elements of a 4-D vector
+ */
+static inline VmathVector4 vmathV4Neg_V( VmathVector4 vec );
+
+/*
+ * Construct x axis
+ */
+static inline VmathVector4 vmathV4MakeXAxis_V( );
+
+/*
+ * Construct y axis
+ */
+static inline VmathVector4 vmathV4MakeYAxis_V( );
+
+/*
+ * Construct z axis
+ */
+static inline VmathVector4 vmathV4MakeZAxis_V( );
+
+/*
+ * Construct w axis
+ */
+static inline VmathVector4 vmathV4MakeWAxis_V( );
+
+/*
+ * Multiply two 4-D vectors per element
+ */
+static inline VmathVector4 vmathV4MulPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Divide two 4-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathVector4 vmathV4DivPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Compute the reciprocal of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathVector4 vmathV4RecipPerElem_V( VmathVector4 vec );
+
+/*
+ * Compute the square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathVector4 vmathV4SqrtPerElem_V( VmathVector4 vec );
+
+/*
+ * Compute the reciprocal square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathVector4 vmathV4RsqrtPerElem_V( VmathVector4 vec );
+
+/*
+ * Compute the absolute value of a 4-D vector per element
+ */
+static inline VmathVector4 vmathV4AbsPerElem_V( VmathVector4 vec );
+
+/*
+ * Copy sign from one 4-D vector to another, per element
+ */
+static inline VmathVector4 vmathV4CopySignPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Maximum of two 4-D vectors per element
+ */
+static inline VmathVector4 vmathV4MaxPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Minimum of two 4-D vectors per element
+ */
+static inline VmathVector4 vmathV4MinPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Maximum element of a 4-D vector
+ */
+static inline float vmathV4MaxElem_V( VmathVector4 vec );
+
+/*
+ * Minimum element of a 4-D vector
+ */
+static inline float vmathV4MinElem_V( VmathVector4 vec );
+
+/*
+ * Compute the sum of all elements of a 4-D vector
+ */
+static inline float vmathV4Sum_V( VmathVector4 vec );
+
+/*
+ * Compute the dot product of two 4-D vectors
+ */
+static inline float vmathV4Dot_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Compute the square of the length of a 4-D vector
+ */
+static inline float vmathV4LengthSqr_V( VmathVector4 vec );
+
+/*
+ * Compute the length of a 4-D vector
+ */
+static inline float vmathV4Length_V( VmathVector4 vec );
+
+/*
+ * Normalize a 4-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline VmathVector4 vmathV4Normalize_V( VmathVector4 vec );
+
+/*
+ * Outer product of two 4-D vectors
+ */
+static inline VmathMatrix4 vmathV4Outer_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Linear interpolation between two 4-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathVector4 vmathV4Lerp_V( float t, VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Spherical linear interpolation between two 4-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathVector4 vmathV4Slerp_V( float t, VmathVector4 unitVec0, VmathVector4 unitVec1 );
+
+/*
+ * Conditionally select between two 4-D vectors
+ */
+static inline VmathVector4 vmathV4Select_V( VmathVector4 vec0, VmathVector4 vec1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV4Print_V( VmathVector4 vec );
+
+/*
+ * Print a 4-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV4Prints_V( VmathVector4 vec, const char *name );
+
+#endif
+
+/*
+ * Construct a 3-D point from x, y, and z elements
+ */
+static inline VmathPoint3 vmathP3MakeFromElems_V( float x, float y, float z );
+
+/*
+ * Copy elements from a 3-D vector into a 3-D point
+ */
+static inline VmathPoint3 vmathP3MakeFromV3_V( VmathVector3 vec );
+
+/*
+ * Set all elements of a 3-D point to the same scalar value
+ */
+static inline VmathPoint3 vmathP3MakeFromScalar_V( float scalar );
+
+/*
+ * Set the x element of a 3-D point
+ */
+static inline void vmathP3SetX_V( VmathPoint3 *result, float x );
+
+/*
+ * Set the y element of a 3-D point
+ */
+static inline void vmathP3SetY_V( VmathPoint3 *result, float y );
+
+/*
+ * Set the z element of a 3-D point
+ */
+static inline void vmathP3SetZ_V( VmathPoint3 *result, float z );
+
+/*
+ * Get the x element of a 3-D point
+ */
+static inline float vmathP3GetX_V( VmathPoint3 pnt );
+
+/*
+ * Get the y element of a 3-D point
+ */
+static inline float vmathP3GetY_V( VmathPoint3 pnt );
+
+/*
+ * Get the z element of a 3-D point
+ */
+static inline float vmathP3GetZ_V( VmathPoint3 pnt );
+
+/*
+ * Set an x, y, or z element of a 3-D point by index
+ */
+static inline void vmathP3SetElem_V( VmathPoint3 *result, int idx, float value );
+
+/*
+ * Get an x, y, or z element of a 3-D point by index
+ */
+static inline float vmathP3GetElem_V( VmathPoint3 pnt, int idx );
+
+/*
+ * Subtract a 3-D point from another 3-D point
+ */
+static inline VmathVector3 vmathP3Sub_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Add a 3-D point to a 3-D vector
+ */
+static inline VmathPoint3 vmathP3AddV3_V( VmathPoint3 pnt, VmathVector3 vec );
+
+/*
+ * Subtract a 3-D vector from a 3-D point
+ */
+static inline VmathPoint3 vmathP3SubV3_V( VmathPoint3 pnt, VmathVector3 vec );
+
+/*
+ * Multiply two 3-D points per element
+ */
+static inline VmathPoint3 vmathP3MulPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Divide two 3-D points per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathPoint3 vmathP3DivPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Compute the reciprocal of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathPoint3 vmathP3RecipPerElem_V( VmathPoint3 pnt );
+
+/*
+ * Compute the square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathPoint3 vmathP3SqrtPerElem_V( VmathPoint3 pnt );
+
+/*
+ * Compute the reciprocal square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathPoint3 vmathP3RsqrtPerElem_V( VmathPoint3 pnt );
+
+/*
+ * Compute the absolute value of a 3-D point per element
+ */
+static inline VmathPoint3 vmathP3AbsPerElem_V( VmathPoint3 pnt );
+
+/*
+ * Copy sign from one 3-D point to another, per element
+ */
+static inline VmathPoint3 vmathP3CopySignPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Maximum of two 3-D points per element
+ */
+static inline VmathPoint3 vmathP3MaxPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Minimum of two 3-D points per element
+ */
+static inline VmathPoint3 vmathP3MinPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Maximum element of a 3-D point
+ */
+static inline float vmathP3MaxElem_V( VmathPoint3 pnt );
+
+/*
+ * Minimum element of a 3-D point
+ */
+static inline float vmathP3MinElem_V( VmathPoint3 pnt );
+
+/*
+ * Compute the sum of all elements of a 3-D point
+ */
+static inline float vmathP3Sum_V( VmathPoint3 pnt );
+
+/*
+ * Apply uniform scale to a 3-D point
+ */
+static inline VmathPoint3 vmathP3Scale_V( VmathPoint3 pnt, float scaleVal );
+
+/*
+ * Apply non-uniform scale to a 3-D point
+ */
+static inline VmathPoint3 vmathP3NonUniformScale_V( VmathPoint3 pnt, VmathVector3 scaleVec );
+
+/*
+ * Scalar projection of a 3-D point on a unit-length 3-D vector
+ */
+static inline float vmathP3Projection_V( VmathPoint3 pnt, VmathVector3 unitVec );
+
+/*
+ * Compute the square of the distance of a 3-D point from the coordinate-system origin
+ */
+static inline float vmathP3DistSqrFromOrigin_V( VmathPoint3 pnt );
+
+/*
+ * Compute the distance of a 3-D point from the coordinate-system origin
+ */
+static inline float vmathP3DistFromOrigin_V( VmathPoint3 pnt );
+
+/*
+ * Compute the square of the distance between two 3-D points
+ */
+static inline float vmathP3DistSqr_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Compute the distance between two 3-D points
+ */
+static inline float vmathP3Dist_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Linear interpolation between two 3-D points
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathPoint3 vmathP3Lerp_V( float t, VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Conditionally select between two 3-D points
+ */
+static inline VmathPoint3 vmathP3Select_V( VmathPoint3 pnt0, VmathPoint3 pnt1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D point
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathP3Print_V( VmathPoint3 pnt );
+
+/*
+ * Print a 3-D point and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathP3Prints_V( VmathPoint3 pnt, const char *name );
+
+#endif
+
+/*
+ * Construct a quaternion from x, y, z, and w elements
+ */
+static inline VmathQuat vmathQMakeFromElems_V( float x, float y, float z, float w );
+
+/*
+ * Construct a quaternion from a 3-D vector and a scalar
+ */
+static inline VmathQuat vmathQMakeFromV3Scalar_V( VmathVector3 xyz, float w );
+
+/*
+ * Copy elements from a 4-D vector into a quaternion
+ */
+static inline VmathQuat vmathQMakeFromV4_V( VmathVector4 vec );
+
+/*
+ * Convert a rotation matrix to a unit-length quaternion
+ */
+static inline VmathQuat vmathQMakeFromM3_V( VmathMatrix3 rotMat );
+
+/*
+ * Set all elements of a quaternion to the same scalar value
+ */
+static inline VmathQuat vmathQMakeFromScalar_V( float scalar );
+
+/*
+ * Set the x, y, and z elements of a quaternion
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathQSetXYZ_V( VmathQuat *result, VmathVector3 vec );
+
+/*
+ * Get the x, y, and z elements of a quaternion
+ */
+static inline VmathVector3 vmathQGetXYZ_V( VmathQuat quat );
+
+/*
+ * Set the x element of a quaternion
+ */
+static inline void vmathQSetX_V( VmathQuat *result, float x );
+
+/*
+ * Set the y element of a quaternion
+ */
+static inline void vmathQSetY_V( VmathQuat *result, float y );
+
+/*
+ * Set the z element of a quaternion
+ */
+static inline void vmathQSetZ_V( VmathQuat *result, float z );
+
+/*
+ * Set the w element of a quaternion
+ */
+static inline void vmathQSetW_V( VmathQuat *result, float w );
+
+/*
+ * Get the x element of a quaternion
+ */
+static inline float vmathQGetX_V( VmathQuat quat );
+
+/*
+ * Get the y element of a quaternion
+ */
+static inline float vmathQGetY_V( VmathQuat quat );
+
+/*
+ * Get the z element of a quaternion
+ */
+static inline float vmathQGetZ_V( VmathQuat quat );
+
+/*
+ * Get the w element of a quaternion
+ */
+static inline float vmathQGetW_V( VmathQuat quat );
+
+/*
+ * Set an x, y, z, or w element of a quaternion by index
+ */
+static inline void vmathQSetElem_V( VmathQuat *result, int idx, float value );
+
+/*
+ * Get an x, y, z, or w element of a quaternion by index
+ */
+static inline float vmathQGetElem_V( VmathQuat quat, int idx );
+
+/*
+ * Add two quaternions
+ */
+static inline VmathQuat vmathQAdd_V( VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Subtract a quaternion from another quaternion
+ */
+static inline VmathQuat vmathQSub_V( VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Multiply two quaternions
+ */
+static inline VmathQuat vmathQMul_V( VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Multiply a quaternion by a scalar
+ */
+static inline VmathQuat vmathQScalarMul_V( VmathQuat quat, float scalar );
+
+/*
+ * Divide a quaternion by a scalar
+ */
+static inline VmathQuat vmathQScalarDiv_V( VmathQuat quat, float scalar );
+
+/*
+ * Negate all elements of a quaternion
+ */
+static inline VmathQuat vmathQNeg_V( VmathQuat quat );
+
+/*
+ * Construct an identity quaternion
+ */
+static inline VmathQuat vmathQMakeIdentity_V( );
+
+/*
+ * Construct a quaternion to rotate between two unit-length 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+ */
+static inline VmathQuat vmathQMakeRotationArc_V( VmathVector3 unitVec0, VmathVector3 unitVec1 );
+
+/*
+ * Construct a quaternion to rotate around a unit-length 3-D vector
+ */
+static inline VmathQuat vmathQMakeRotationAxis_V( float radians, VmathVector3 unitVec );
+
+/*
+ * Construct a quaternion to rotate around the x axis
+ */
+static inline VmathQuat vmathQMakeRotationX_V( float radians );
+
+/*
+ * Construct a quaternion to rotate around the y axis
+ */
+static inline VmathQuat vmathQMakeRotationY_V( float radians );
+
+/*
+ * Construct a quaternion to rotate around the z axis
+ */
+static inline VmathQuat vmathQMakeRotationZ_V( float radians );
+
+/*
+ * Compute the conjugate of a quaternion
+ */
+static inline VmathQuat vmathQConj_V( VmathQuat quat );
+
+/*
+ * Use a unit-length quaternion to rotate a 3-D vector
+ */
+static inline VmathVector3 vmathQRotate_V( VmathQuat unitQuat, VmathVector3 vec );
+
+/*
+ * Compute the dot product of two quaternions
+ */
+static inline float vmathQDot_V( VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Compute the norm of a quaternion
+ */
+static inline float vmathQNorm_V( VmathQuat quat );
+
+/*
+ * Compute the length of a quaternion
+ */
+static inline float vmathQLength_V( VmathQuat quat );
+
+/*
+ * Normalize a quaternion
+ * NOTE: 
+ * The result is unpredictable when all elements of quat are at or near zero.
+ */
+static inline VmathQuat vmathQNormalize_V( VmathQuat quat );
+
+/*
+ * Linear interpolation between two quaternions
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathQuat vmathQLerp_V( float t, VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Spherical linear interpolation between two quaternions
+ * NOTE: 
+ * Interpolates along the shortest path between orientations.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathQuat vmathQSlerp_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1 );
+
+/*
+ * Spherical quadrangle interpolation
+ */
+static inline VmathQuat vmathQSquad_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1, VmathQuat unitQuat2, VmathQuat unitQuat3 );
+
+/*
+ * Conditionally select between two quaternions
+ */
+static inline VmathQuat vmathQSelect_V( VmathQuat quat0, VmathQuat quat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a quaternion
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathQPrint_V( VmathQuat quat );
+
+/*
+ * Print a quaternion and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathQPrints_V( VmathQuat quat, const char *name );
+
+#endif
+
+/*
+ * Construct a 3x3 matrix containing the specified columns
+ */
+static inline VmathMatrix3 vmathM3MakeFromCols_V( VmathVector3 col0, VmathVector3 col1, VmathVector3 col2 );
+
+/*
+ * Construct a 3x3 rotation matrix from a unit-length quaternion
+ */
+static inline VmathMatrix3 vmathM3MakeFromQ_V( VmathQuat unitQuat );
+
+/*
+ * Set all elements of a 3x3 matrix to the same scalar value
+ */
+static inline VmathMatrix3 vmathM3MakeFromScalar_V( float scalar );
+
+/*
+ * Set column 0 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol0_V( VmathMatrix3 *result, VmathVector3 col0 );
+
+/*
+ * Set column 1 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol1_V( VmathMatrix3 *result, VmathVector3 col1 );
+
+/*
+ * Set column 2 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol2_V( VmathMatrix3 *result, VmathVector3 col2 );
+
+/*
+ * Get column 0 of a 3x3 matrix
+ */
+static inline VmathVector3 vmathM3GetCol0_V( VmathMatrix3 mat );
+
+/*
+ * Get column 1 of a 3x3 matrix
+ */
+static inline VmathVector3 vmathM3GetCol1_V( VmathMatrix3 mat );
+
+/*
+ * Get column 2 of a 3x3 matrix
+ */
+static inline VmathVector3 vmathM3GetCol2_V( VmathMatrix3 mat );
+
+/*
+ * Set the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3SetCol_V( VmathMatrix3 *result, int col, VmathVector3 vec );
+
+/*
+ * Set the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3SetRow_V( VmathMatrix3 *result, int row, VmathVector3 vec );
+
+/*
+ * Get the column of a 3x3 matrix referred to by the specified index
+ */
+static inline VmathVector3 vmathM3GetCol_V( VmathMatrix3 mat, int col );
+
+/*
+ * Get the row of a 3x3 matrix referred to by the specified index
+ */
+static inline VmathVector3 vmathM3GetRow_V( VmathMatrix3 mat, int row );
+
+/*
+ * Set the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline void vmathM3SetElem_V( VmathMatrix3 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline float vmathM3GetElem_V( VmathMatrix3 mat, int col, int row );
+
+/*
+ * Add two 3x3 matrices
+ */
+static inline VmathMatrix3 vmathM3Add_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
+
+/*
+ * Subtract a 3x3 matrix from another 3x3 matrix
+ */
+static inline VmathMatrix3 vmathM3Sub_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
+
+/*
+ * Negate all elements of a 3x3 matrix
+ */
+static inline VmathMatrix3 vmathM3Neg_V( VmathMatrix3 mat );
+
+/*
+ * Multiply a 3x3 matrix by a scalar
+ */
+static inline VmathMatrix3 vmathM3ScalarMul_V( VmathMatrix3 mat, float scalar );
+
+/*
+ * Multiply a 3x3 matrix by a 3-D vector
+ */
+static inline VmathVector3 vmathM3MulV3_V( VmathMatrix3 mat, VmathVector3 vec );
+
+/*
+ * Multiply two 3x3 matrices
+ */
+static inline VmathMatrix3 vmathM3Mul_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
+
+/*
+ * Construct an identity 3x3 matrix
+ */
+static inline VmathMatrix3 vmathM3MakeIdentity_V( );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x axis
+ */
+static inline VmathMatrix3 vmathM3MakeRotationX_V( float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the y axis
+ */
+static inline VmathMatrix3 vmathM3MakeRotationY_V( float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the z axis
+ */
+static inline VmathMatrix3 vmathM3MakeRotationZ_V( float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x, y, and z axes
+ */
+static inline VmathMatrix3 vmathM3MakeRotationZYX_V( VmathVector3 radiansXYZ );
+
+/*
+ * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathMatrix3 vmathM3MakeRotationAxis_V( float radians, VmathVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathMatrix3 vmathM3MakeRotationQ_V( VmathQuat unitQuat );
+
+/*
+ * Construct a 3x3 matrix to perform scaling
+ */
+static inline VmathMatrix3 vmathM3MakeScale_V( VmathVector3 scaleVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathMatrix3 vmathM3AppendScale_V( VmathMatrix3 mat, VmathVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathMatrix3 vmathM3PrependScale_V( VmathVector3 scaleVec, VmathMatrix3 mat );
+
+/*
+ * Multiply two 3x3 matrices per element
+ */
+static inline VmathMatrix3 vmathM3MulPerElem_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
+
+/*
+ * Compute the absolute value of a 3x3 matrix per element
+ */
+static inline VmathMatrix3 vmathM3AbsPerElem_V( VmathMatrix3 mat );
+
+/*
+ * Transpose of a 3x3 matrix
+ */
+static inline VmathMatrix3 vmathM3Transpose_V( VmathMatrix3 mat );
+
+/*
+ * Compute the inverse of a 3x3 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathMatrix3 vmathM3Inverse_V( VmathMatrix3 mat );
+
+/*
+ * Determinant of a 3x3 matrix
+ */
+static inline float vmathM3Determinant_V( VmathMatrix3 mat );
+
+/*
+ * Conditionally select between two 3x3 matrices
+ */
+static inline VmathMatrix3 vmathM3Select_V( VmathMatrix3 mat0, VmathMatrix3 mat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x3 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM3Print_V( VmathMatrix3 mat );
+
+/*
+ * Print a 3x3 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM3Prints_V( VmathMatrix3 mat, const char *name );
+
+#endif
+
+/*
+ * Construct a 4x4 matrix containing the specified columns
+ */
+static inline VmathMatrix4 vmathM4MakeFromCols_V( VmathVector4 col0, VmathVector4 col1, VmathVector4 col2, VmathVector4 col3 );
+
+/*
+ * Construct a 4x4 matrix from a 3x4 transformation matrix
+ */
+static inline VmathMatrix4 vmathM4MakeFromT3_V( VmathTransform3 mat );
+
+/*
+ * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline VmathMatrix4 vmathM4MakeFromM3V3_V( VmathMatrix3 mat, VmathVector3 translateVec );
+
+/*
+ * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline VmathMatrix4 vmathM4MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec );
+
+/*
+ * Set all elements of a 4x4 matrix to the same scalar value
+ */
+static inline VmathMatrix4 vmathM4MakeFromScalar_V( float scalar );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathM4SetUpper3x3_V( VmathMatrix4 *result, VmathMatrix3 mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 4x4 matrix
+ */
+static inline VmathMatrix3 vmathM4GetUpper3x3_V( VmathMatrix4 mat );
+
+/*
+ * Set translation component
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathM4SetTranslation_V( VmathMatrix4 *result, VmathVector3 translateVec );
+
+/*
+ * Get the translation component of a 4x4 matrix
+ */
+static inline VmathVector3 vmathM4GetTranslation_V( VmathMatrix4 mat );
+
+/*
+ * Set column 0 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol0_V( VmathMatrix4 *result, VmathVector4 col0 );
+
+/*
+ * Set column 1 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol1_V( VmathMatrix4 *result, VmathVector4 col1 );
+
+/*
+ * Set column 2 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol2_V( VmathMatrix4 *result, VmathVector4 col2 );
+
+/*
+ * Set column 3 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol3_V( VmathMatrix4 *result, VmathVector4 col3 );
+
+/*
+ * Get column 0 of a 4x4 matrix
+ */
+static inline VmathVector4 vmathM4GetCol0_V( VmathMatrix4 mat );
+
+/*
+ * Get column 1 of a 4x4 matrix
+ */
+static inline VmathVector4 vmathM4GetCol1_V( VmathMatrix4 mat );
+
+/*
+ * Get column 2 of a 4x4 matrix
+ */
+static inline VmathVector4 vmathM4GetCol2_V( VmathMatrix4 mat );
+
+/*
+ * Get column 3 of a 4x4 matrix
+ */
+static inline VmathVector4 vmathM4GetCol3_V( VmathMatrix4 mat );
+
+/*
+ * Set the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4SetCol_V( VmathMatrix4 *result, int col, VmathVector4 vec );
+
+/*
+ * Set the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4SetRow_V( VmathMatrix4 *result, int row, VmathVector4 vec );
+
+/*
+ * Get the column of a 4x4 matrix referred to by the specified index
+ */
+static inline VmathVector4 vmathM4GetCol_V( VmathMatrix4 mat, int col );
+
+/*
+ * Get the row of a 4x4 matrix referred to by the specified index
+ */
+static inline VmathVector4 vmathM4GetRow_V( VmathMatrix4 mat, int row );
+
+/*
+ * Set the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline void vmathM4SetElem_V( VmathMatrix4 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline float vmathM4GetElem_V( VmathMatrix4 mat, int col, int row );
+
+/*
+ * Add two 4x4 matrices
+ */
+static inline VmathMatrix4 vmathM4Add_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
+
+/*
+ * Subtract a 4x4 matrix from another 4x4 matrix
+ */
+static inline VmathMatrix4 vmathM4Sub_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
+
+/*
+ * Negate all elements of a 4x4 matrix
+ */
+static inline VmathMatrix4 vmathM4Neg_V( VmathMatrix4 mat );
+
+/*
+ * Multiply a 4x4 matrix by a scalar
+ */
+static inline VmathMatrix4 vmathM4ScalarMul_V( VmathMatrix4 mat, float scalar );
+
+/*
+ * Multiply a 4x4 matrix by a 4-D vector
+ */
+static inline VmathVector4 vmathM4MulV4_V( VmathMatrix4 mat, VmathVector4 vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D vector
+ */
+static inline VmathVector4 vmathM4MulV3_V( VmathMatrix4 mat, VmathVector3 vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D point
+ */
+static inline VmathVector4 vmathM4MulP3_V( VmathMatrix4 mat, VmathPoint3 pnt );
+
+/*
+ * Multiply two 4x4 matrices
+ */
+static inline VmathMatrix4 vmathM4Mul_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
+
+/*
+ * Multiply a 4x4 matrix by a 3x4 transformation matrix
+ */
+static inline VmathMatrix4 vmathM4MulT3_V( VmathMatrix4 mat, VmathTransform3 tfrm );
+
+/*
+ * Construct an identity 4x4 matrix
+ */
+static inline VmathMatrix4 vmathM4MakeIdentity_V( );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x axis
+ */
+static inline VmathMatrix4 vmathM4MakeRotationX_V( float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the y axis
+ */
+static inline VmathMatrix4 vmathM4MakeRotationY_V( float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the z axis
+ */
+static inline VmathMatrix4 vmathM4MakeRotationZ_V( float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x, y, and z axes
+ */
+static inline VmathMatrix4 vmathM4MakeRotationZYX_V( VmathVector3 radiansXYZ );
+
+/*
+ * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathMatrix4 vmathM4MakeRotationAxis_V( float radians, VmathVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathMatrix4 vmathM4MakeRotationQ_V( VmathQuat unitQuat );
+
+/*
+ * Construct a 4x4 matrix to perform scaling
+ */
+static inline VmathMatrix4 vmathM4MakeScale_V( VmathVector3 scaleVec );
+
+/*
+ * Construct a 4x4 matrix to perform translation
+ */
+static inline VmathMatrix4 vmathM4MakeTranslation_V( VmathVector3 translateVec );
+
+/*
+ * Construct viewing matrix based on eye position, position looked at, and up direction
+ */
+static inline VmathMatrix4 vmathM4MakeLookAt_V( VmathPoint3 eyePos, VmathPoint3 lookAtPos, VmathVector3 upVec );
+
+/*
+ * Construct a perspective projection matrix
+ */
+static inline VmathMatrix4 vmathM4MakePerspective_V( float fovyRadians, float aspect, float zNear, float zFar );
+
+/*
+ * Construct a perspective projection matrix based on frustum
+ */
+static inline VmathMatrix4 vmathM4MakeFrustum_V( float left, float right, float bottom, float top, float zNear, float zFar );
+
+/*
+ * Construct an orthographic projection matrix
+ */
+static inline VmathMatrix4 vmathM4MakeOrthographic_V( float left, float right, float bottom, float top, float zNear, float zFar );
+
+/*
+ * Append (post-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathMatrix4 vmathM4AppendScale_V( VmathMatrix4 mat, VmathVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathMatrix4 vmathM4PrependScale_V( VmathVector3 scaleVec, VmathMatrix4 mat );
+
+/*
+ * Multiply two 4x4 matrices per element
+ */
+static inline VmathMatrix4 vmathM4MulPerElem_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
+
+/*
+ * Compute the absolute value of a 4x4 matrix per element
+ */
+static inline VmathMatrix4 vmathM4AbsPerElem_V( VmathMatrix4 mat );
+
+/*
+ * Transpose of a 4x4 matrix
+ */
+static inline VmathMatrix4 vmathM4Transpose_V( VmathMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathMatrix4 vmathM4Inverse_V( VmathMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathMatrix4 vmathM4AffineInverse_V( VmathMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+ */
+static inline VmathMatrix4 vmathM4OrthoInverse_V( VmathMatrix4 mat );
+
+/*
+ * Determinant of a 4x4 matrix
+ */
+static inline float vmathM4Determinant_V( VmathMatrix4 mat );
+
+/*
+ * Conditionally select between two 4x4 matrices
+ */
+static inline VmathMatrix4 vmathM4Select_V( VmathMatrix4 mat0, VmathMatrix4 mat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4x4 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM4Print_V( VmathMatrix4 mat );
+
+/*
+ * Print a 4x4 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM4Prints_V( VmathMatrix4 mat, const char *name );
+
+#endif
+
+/*
+ * Construct a 3x4 transformation matrix containing the specified columns
+ */
+static inline VmathTransform3 vmathT3MakeFromCols_V( VmathVector3 col0, VmathVector3 col1, VmathVector3 col2, VmathVector3 col3 );
+
+/*
+ * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline VmathTransform3 vmathT3MakeFromM3V3_V( VmathMatrix3 tfrm, VmathVector3 translateVec );
+
+/*
+ * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline VmathTransform3 vmathT3MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec );
+
+/*
+ * Set all elements of a 3x4 transformation matrix to the same scalar value
+ */
+static inline VmathTransform3 vmathT3MakeFromScalar_V( float scalar );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ */
+static inline void vmathT3SetUpper3x3_V( VmathTransform3 *result, VmathMatrix3 mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+ */
+static inline VmathMatrix3 vmathT3GetUpper3x3_V( VmathTransform3 tfrm );
+
+/*
+ * Set translation component
+ */
+static inline void vmathT3SetTranslation_V( VmathTransform3 *result, VmathVector3 translateVec );
+
+/*
+ * Get the translation component of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetTranslation_V( VmathTransform3 tfrm );
+
+/*
+ * Set column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol0_V( VmathTransform3 *result, VmathVector3 col0 );
+
+/*
+ * Set column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol1_V( VmathTransform3 *result, VmathVector3 col1 );
+
+/*
+ * Set column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol2_V( VmathTransform3 *result, VmathVector3 col2 );
+
+/*
+ * Set column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol3_V( VmathTransform3 *result, VmathVector3 col3 );
+
+/*
+ * Get column 0 of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetCol0_V( VmathTransform3 tfrm );
+
+/*
+ * Get column 1 of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetCol1_V( VmathTransform3 tfrm );
+
+/*
+ * Get column 2 of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetCol2_V( VmathTransform3 tfrm );
+
+/*
+ * Get column 3 of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetCol3_V( VmathTransform3 tfrm );
+
+/*
+ * Set the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3SetCol_V( VmathTransform3 *result, int col, VmathVector3 vec );
+
+/*
+ * Set the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3SetRow_V( VmathTransform3 *result, int row, VmathVector4 vec );
+
+/*
+ * Get the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline VmathVector3 vmathT3GetCol_V( VmathTransform3 tfrm, int col );
+
+/*
+ * Get the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline VmathVector4 vmathT3GetRow_V( VmathTransform3 tfrm, int row );
+
+/*
+ * Set the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline void vmathT3SetElem_V( VmathTransform3 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline float vmathT3GetElem_V( VmathTransform3 tfrm, int col, int row );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D vector
+ */
+static inline VmathVector3 vmathT3MulV3_V( VmathTransform3 tfrm, VmathVector3 vec );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D point
+ */
+static inline VmathPoint3 vmathT3MulP3_V( VmathTransform3 tfrm, VmathPoint3 pnt );
+
+/*
+ * Multiply two 3x4 transformation matrices
+ */
+static inline VmathTransform3 vmathT3Mul_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 );
+
+/*
+ * Construct an identity 3x4 transformation matrix
+ */
+static inline VmathTransform3 vmathT3MakeIdentity_V( );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x axis
+ */
+static inline VmathTransform3 vmathT3MakeRotationX_V( float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the y axis
+ */
+static inline VmathTransform3 vmathT3MakeRotationY_V( float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the z axis
+ */
+static inline VmathTransform3 vmathT3MakeRotationZ_V( float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+ */
+static inline VmathTransform3 vmathT3MakeRotationZYX_V( VmathVector3 radiansXYZ );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathTransform3 vmathT3MakeRotationAxis_V( float radians, VmathVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathTransform3 vmathT3MakeRotationQ_V( VmathQuat unitQuat );
+
+/*
+ * Construct a 3x4 transformation matrix to perform scaling
+ */
+static inline VmathTransform3 vmathT3MakeScale_V( VmathVector3 scaleVec );
+
+/*
+ * Construct a 3x4 transformation matrix to perform translation
+ */
+static inline VmathTransform3 vmathT3MakeTranslation_V( VmathVector3 translateVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathTransform3 vmathT3AppendScale_V( VmathTransform3 tfrm, VmathVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathTransform3 vmathT3PrependScale_V( VmathVector3 scaleVec, VmathTransform3 tfrm );
+
+/*
+ * Multiply two 3x4 transformation matrices per element
+ */
+static inline VmathTransform3 vmathT3MulPerElem_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 );
+
+/*
+ * Compute the absolute value of a 3x4 transformation matrix per element
+ */
+static inline VmathTransform3 vmathT3AbsPerElem_V( VmathTransform3 tfrm );
+
+/*
+ * Inverse of a 3x4 transformation matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+ */
+static inline VmathTransform3 vmathT3Inverse_V( VmathTransform3 tfrm );
+
+/*
+ * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+ */
+static inline VmathTransform3 vmathT3OrthoInverse_V( VmathTransform3 tfrm );
+
+/*
+ * Conditionally select between two 3x4 transformation matrices
+ */
+static inline VmathTransform3 vmathT3Select_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x4 transformation matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathT3Print_V( VmathTransform3 tfrm );
+
+/*
+ * Print a 3x4 transformation matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathT3Prints_V( VmathTransform3 tfrm, const char *name );
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#include "vectormath_aos.h"
+#include "vec_aos_v.h"
+#include "quat_aos_v.h"
+#include "mat_aos_v.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/scalar/cpp/mat_aos.h b/Extras/vectormathlibrary/include/vectormath/scalar/cpp/mat_aos.h
index 0f2a85057..5d5d012d9 100644
--- a/Extras/vectormathlibrary/include/vectormath/scalar/cpp/mat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/scalar/cpp/mat_aos.h
@@ -1,1643 +1,1643 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_AOS_CPP_H
-#define _VECTORMATH_MAT_AOS_CPP_H
-
-namespace Vectormath {
-namespace Aos {
-
-//-----------------------------------------------------------------------------
-// Constants
-
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-inline Matrix3::Matrix3( const Matrix3 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-}
-
-inline Matrix3::Matrix3( float scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-}
-
-inline Matrix3::Matrix3( const Quat & unitQuat )
-{
-    float qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
-    qx = unitQuat.getX();
-    qy = unitQuat.getY();
-    qz = unitQuat.getZ();
-    qw = unitQuat.getW();
-    qx2 = ( qx + qx );
-    qy2 = ( qy + qy );
-    qz2 = ( qz + qz );
-    qxqx2 = ( qx * qx2 );
-    qxqy2 = ( qx * qy2 );
-    qxqz2 = ( qx * qz2 );
-    qxqw2 = ( qw * qx2 );
-    qyqy2 = ( qy * qy2 );
-    qyqz2 = ( qy * qz2 );
-    qyqw2 = ( qw * qy2 );
-    qzqz2 = ( qz * qz2 );
-    qzqw2 = ( qw * qz2 );
-    mCol0 = Vector3( ( ( 1.0f - qyqy2 ) - qzqz2 ), ( qxqy2 + qzqw2 ), ( qxqz2 - qyqw2 ) );
-    mCol1 = Vector3( ( qxqy2 - qzqw2 ), ( ( 1.0f - qxqx2 ) - qzqz2 ), ( qyqz2 + qxqw2 ) );
-    mCol2 = Vector3( ( qxqz2 + qyqw2 ), ( qyqz2 - qxqw2 ), ( ( 1.0f - qxqx2 ) - qyqy2 ) );
-}
-
-inline Matrix3::Matrix3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-}
-
-inline Matrix3 & Matrix3::setCol0( const Vector3 & _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol1( const Vector3 & _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol2( const Vector3 & _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol( int col, const Vector3 & vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setRow( int row, const Vector3 & vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setElem( int col, int row, float val )
-{
-    Vector3 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline float Matrix3::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector3 Matrix3::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector3 Matrix3::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector3 Matrix3::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector3 Matrix3::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Matrix3::getRow( int row ) const
-{
-    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
-}
-
-inline Vector3 & Matrix3::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Matrix3::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    return *this;
-}
-
-inline const Matrix3 transpose( const Matrix3 & mat )
-{
-    return Matrix3(
-        Vector3( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX() ),
-        Vector3( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY() ),
-        Vector3( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ() )
-    );
-}
-
-inline const Matrix3 inverse( const Matrix3 & mat )
-{
-    Vector3 tmp0, tmp1, tmp2;
-    float detinv;
-    tmp0 = cross( mat.getCol1(), mat.getCol2() );
-    tmp1 = cross( mat.getCol2(), mat.getCol0() );
-    tmp2 = cross( mat.getCol0(), mat.getCol1() );
-    detinv = ( 1.0f / dot( mat.getCol2(), tmp2 ) );
-    return Matrix3(
-        Vector3( ( tmp0.getX() * detinv ), ( tmp1.getX() * detinv ), ( tmp2.getX() * detinv ) ),
-        Vector3( ( tmp0.getY() * detinv ), ( tmp1.getY() * detinv ), ( tmp2.getY() * detinv ) ),
-        Vector3( ( tmp0.getZ() * detinv ), ( tmp1.getZ() * detinv ), ( tmp2.getZ() * detinv ) )
-    );
-}
-
-inline float determinant( const Matrix3 & mat )
-{
-    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
-}
-
-inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( mCol0 + mat.mCol0 ),
-        ( mCol1 + mat.mCol1 ),
-        ( mCol2 + mat.mCol2 )
-    );
-}
-
-inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( mCol0 - mat.mCol0 ),
-        ( mCol1 - mat.mCol1 ),
-        ( mCol2 - mat.mCol2 )
-    );
-}
-
-inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
-{
-    *this = *this + mat;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
-{
-    *this = *this - mat;
-    return *this;
-}
-
-inline const Matrix3 Matrix3::operator -( ) const
-{
-    return Matrix3(
-        ( -mCol0 ),
-        ( -mCol1 ),
-        ( -mCol2 )
-    );
-}
-
-inline const Matrix3 absPerElem( const Matrix3 & mat )
-{
-    return Matrix3(
-        absPerElem( mat.getCol0() ),
-        absPerElem( mat.getCol1() ),
-        absPerElem( mat.getCol2() )
-    );
-}
-
-inline const Matrix3 Matrix3::operator *( float scalar ) const
-{
-    return Matrix3(
-        ( mCol0 * scalar ),
-        ( mCol1 * scalar ),
-        ( mCol2 * scalar )
-    );
-}
-
-inline Matrix3 & Matrix3::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Matrix3 operator *( float scalar, const Matrix3 & mat )
-{
-    return mat * scalar;
-}
-
-inline const Vector3 Matrix3::operator *( const Vector3 & vec ) const
-{
-    return Vector3(
-        ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ),
-        ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ),
-        ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) )
-    );
-}
-
-inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( *this * mat.mCol0 ),
-        ( *this * mat.mCol1 ),
-        ( *this * mat.mCol2 )
-    );
-}
-
-inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
-{
-    *this = *this * mat;
-    return *this;
-}
-
-inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
-{
-    return Matrix3(
-        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
-        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
-        mulPerElem( mat0.getCol2(), mat1.getCol2() )
-    );
-}
-
-inline const Matrix3 Matrix3::identity( )
-{
-    return Matrix3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationX( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Matrix3(
-        Vector3::xAxis( ),
-        Vector3( 0.0f, c, s ),
-        Vector3( 0.0f, -s, c )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationY( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Matrix3(
-        Vector3( c, 0.0f, -s ),
-        Vector3::yAxis( ),
-        Vector3( s, 0.0f, c )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationZ( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Matrix3(
-        Vector3( c, s, 0.0f ),
-        Vector3( -s, c, 0.0f ),
-        Vector3::zAxis( )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationZYX( const Vector3 & radiansXYZ )
-{
-    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sX = sinf( radiansXYZ.getX() );
-    cX = cosf( radiansXYZ.getX() );
-    sY = sinf( radiansXYZ.getY() );
-    cY = cosf( radiansXYZ.getY() );
-    sZ = sinf( radiansXYZ.getZ() );
-    cZ = cosf( radiansXYZ.getZ() );
-    tmp0 = ( cZ * sY );
-    tmp1 = ( sZ * sY );
-    return Matrix3(
-        Vector3( ( cZ * cY ), ( sZ * cY ), -sY ),
-        Vector3( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) ),
-        Vector3( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) )
-    );
-}
-
-inline const Matrix3 Matrix3::rotation( float radians, const Vector3 & unitVec )
-{
-    float x, y, z, s, c, oneMinusC, xy, yz, zx;
-    s = sinf( radians );
-    c = cosf( radians );
-    x = unitVec.getX();
-    y = unitVec.getY();
-    z = unitVec.getZ();
-    xy = ( x * y );
-    yz = ( y * z );
-    zx = ( z * x );
-    oneMinusC = ( 1.0f - c );
-    return Matrix3(
-        Vector3( ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ) ),
-        Vector3( ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ) ),
-        Vector3( ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ) )
-    );
-}
-
-inline const Matrix3 Matrix3::rotation( const Quat & unitQuat )
-{
-    return Matrix3( unitQuat );
-}
-
-inline const Matrix3 Matrix3::scale( const Vector3 & scaleVec )
-{
-    return Matrix3(
-        Vector3( scaleVec.getX(), 0.0f, 0.0f ),
-        Vector3( 0.0f, scaleVec.getY(), 0.0f ),
-        Vector3( 0.0f, 0.0f, scaleVec.getZ() )
-    );
-}
-
-inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec )
-{
-    return Matrix3(
-        ( mat.getCol0() * scaleVec.getX( ) ),
-        ( mat.getCol1() * scaleVec.getY( ) ),
-        ( mat.getCol2() * scaleVec.getZ( ) )
-    );
-}
-
-inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat )
-{
-    return Matrix3(
-        mulPerElem( mat.getCol0(), scaleVec ),
-        mulPerElem( mat.getCol1(), scaleVec ),
-        mulPerElem( mat.getCol2(), scaleVec )
-    );
-}
-
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 )
-{
-    return Matrix3(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Matrix3 & mat )
-{
-    print( mat.getRow( 0 ) );
-    print( mat.getRow( 1 ) );
-    print( mat.getRow( 2 ) );
-}
-
-inline void print( const Matrix3 & mat, const char * name )
-{
-    printf("%s:\n", name);
-    print( mat );
-}
-
-#endif
-
-inline Matrix4::Matrix4( const Matrix4 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    mCol3 = mat.mCol3;
-}
-
-inline Matrix4::Matrix4( float scalar )
-{
-    mCol0 = Vector4( scalar );
-    mCol1 = Vector4( scalar );
-    mCol2 = Vector4( scalar );
-    mCol3 = Vector4( scalar );
-}
-
-inline Matrix4::Matrix4( const Transform3 & mat )
-{
-    mCol0 = Vector4( mat.getCol0(), 0.0f );
-    mCol1 = Vector4( mat.getCol1(), 0.0f );
-    mCol2 = Vector4( mat.getCol2(), 0.0f );
-    mCol3 = Vector4( mat.getCol3(), 1.0f );
-}
-
-inline Matrix4::Matrix4( const Vector4 & _col0, const Vector4 & _col1, const Vector4 & _col2, const Vector4 & _col3 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-    mCol3 = _col3;
-}
-
-inline Matrix4::Matrix4( const Matrix3 & mat, const Vector3 & translateVec )
-{
-    mCol0 = Vector4( mat.getCol0(), 0.0f );
-    mCol1 = Vector4( mat.getCol1(), 0.0f );
-    mCol2 = Vector4( mat.getCol2(), 0.0f );
-    mCol3 = Vector4( translateVec, 1.0f );
-}
-
-inline Matrix4::Matrix4( const Quat & unitQuat, const Vector3 & translateVec )
-{
-    Matrix3 mat;
-    mat = Matrix3( unitQuat );
-    mCol0 = Vector4( mat.getCol0(), 0.0f );
-    mCol1 = Vector4( mat.getCol1(), 0.0f );
-    mCol2 = Vector4( mat.getCol2(), 0.0f );
-    mCol3 = Vector4( translateVec, 1.0f );
-}
-
-inline Matrix4 & Matrix4::setCol0( const Vector4 & _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol1( const Vector4 & _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol2( const Vector4 & _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol3( const Vector4 & _col3 )
-{
-    mCol3 = _col3;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol( int col, const Vector4 & vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setRow( int row, const Vector4 & vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    mCol3.setElem( row, vec.getElem( 3 ) );
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setElem( int col, int row, float val )
-{
-    Vector4 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline float Matrix4::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector4 Matrix4::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector4 Matrix4::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector4 Matrix4::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector4 Matrix4::getCol3( ) const
-{
-    return mCol3;
-}
-
-inline const Vector4 Matrix4::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Matrix4::getRow( int row ) const
-{
-    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
-}
-
-inline Vector4 & Matrix4::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Matrix4::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    mCol3 = mat.mCol3;
-    return *this;
-}
-
-inline const Matrix4 transpose( const Matrix4 & mat )
-{
-    return Matrix4(
-        Vector4( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX(), mat.getCol3().getX() ),
-        Vector4( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY(), mat.getCol3().getY() ),
-        Vector4( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ(), mat.getCol3().getZ() ),
-        Vector4( mat.getCol0().getW(), mat.getCol1().getW(), mat.getCol2().getW(), mat.getCol3().getW() )
-    );
-}
-
-inline const Matrix4 inverse( const Matrix4 & mat )
-{
-    Vector4 res0, res1, res2, res3;
-    float mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
-    mA = mat.getCol0().getX();
-    mB = mat.getCol0().getY();
-    mC = mat.getCol0().getZ();
-    mD = mat.getCol0().getW();
-    mE = mat.getCol1().getX();
-    mF = mat.getCol1().getY();
-    mG = mat.getCol1().getZ();
-    mH = mat.getCol1().getW();
-    mI = mat.getCol2().getX();
-    mJ = mat.getCol2().getY();
-    mK = mat.getCol2().getZ();
-    mL = mat.getCol2().getW();
-    mM = mat.getCol3().getX();
-    mN = mat.getCol3().getY();
-    mO = mat.getCol3().getZ();
-    mP = mat.getCol3().getW();
-    tmp0 = ( ( mK * mD ) - ( mC * mL ) );
-    tmp1 = ( ( mO * mH ) - ( mG * mP ) );
-    tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
-    tmp3 = ( ( mF * mO ) - ( mN * mG ) );
-    tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
-    tmp5 = ( ( mN * mH ) - ( mF * mP ) );
-    res0.setX( ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) ) );
-    res0.setY( ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) ) );
-    res0.setZ( ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) ) );
-    res0.setW( ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) ) );
-    detInv = ( 1.0f / ( ( ( ( mA * res0.getX() ) + ( mE * res0.getY() ) ) + ( mI * res0.getZ() ) ) + ( mM * res0.getW() ) ) );
-    res1.setX( ( mI * tmp1 ) );
-    res1.setY( ( mM * tmp0 ) );
-    res1.setZ( ( mA * tmp1 ) );
-    res1.setW( ( mE * tmp0 ) );
-    res3.setX( ( mI * tmp3 ) );
-    res3.setY( ( mM * tmp2 ) );
-    res3.setZ( ( mA * tmp3 ) );
-    res3.setW( ( mE * tmp2 ) );
-    res2.setX( ( mI * tmp5 ) );
-    res2.setY( ( mM * tmp4 ) );
-    res2.setZ( ( mA * tmp5 ) );
-    res2.setW( ( mE * tmp4 ) );
-    tmp0 = ( ( mI * mB ) - ( mA * mJ ) );
-    tmp1 = ( ( mM * mF ) - ( mE * mN ) );
-    tmp2 = ( ( mI * mD ) - ( mA * mL ) );
-    tmp3 = ( ( mM * mH ) - ( mE * mP ) );
-    tmp4 = ( ( mI * mC ) - ( mA * mK ) );
-    tmp5 = ( ( mM * mG ) - ( mE * mO ) );
-    res2.setX( ( ( ( mL * tmp1 ) - ( mJ * tmp3 ) ) + res2.getX() ) );
-    res2.setY( ( ( ( mP * tmp0 ) - ( mN * tmp2 ) ) + res2.getY() ) );
-    res2.setZ( ( ( ( mB * tmp3 ) - ( mD * tmp1 ) ) - res2.getZ() ) );
-    res2.setW( ( ( ( mF * tmp2 ) - ( mH * tmp0 ) ) - res2.getW() ) );
-    res3.setX( ( ( ( mJ * tmp5 ) - ( mK * tmp1 ) ) + res3.getX() ) );
-    res3.setY( ( ( ( mN * tmp4 ) - ( mO * tmp0 ) ) + res3.getY() ) );
-    res3.setZ( ( ( ( mC * tmp1 ) - ( mB * tmp5 ) ) - res3.getZ() ) );
-    res3.setW( ( ( ( mG * tmp0 ) - ( mF * tmp4 ) ) - res3.getW() ) );
-    res1.setX( ( ( ( mK * tmp3 ) - ( mL * tmp5 ) ) - res1.getX() ) );
-    res1.setY( ( ( ( mO * tmp2 ) - ( mP * tmp4 ) ) - res1.getY() ) );
-    res1.setZ( ( ( ( mD * tmp5 ) - ( mC * tmp3 ) ) + res1.getZ() ) );
-    res1.setW( ( ( ( mH * tmp4 ) - ( mG * tmp2 ) ) + res1.getW() ) );
-    return Matrix4(
-        ( res0 * detInv ),
-        ( res1 * detInv ),
-        ( res2 * detInv ),
-        ( res3 * detInv )
-    );
-}
-
-inline const Matrix4 affineInverse( const Matrix4 & mat )
-{
-    Transform3 affineMat;
-    affineMat.setCol0( mat.getCol0().getXYZ( ) );
-    affineMat.setCol1( mat.getCol1().getXYZ( ) );
-    affineMat.setCol2( mat.getCol2().getXYZ( ) );
-    affineMat.setCol3( mat.getCol3().getXYZ( ) );
-    return Matrix4( inverse( affineMat ) );
-}
-
-inline const Matrix4 orthoInverse( const Matrix4 & mat )
-{
-    Transform3 affineMat;
-    affineMat.setCol0( mat.getCol0().getXYZ( ) );
-    affineMat.setCol1( mat.getCol1().getXYZ( ) );
-    affineMat.setCol2( mat.getCol2().getXYZ( ) );
-    affineMat.setCol3( mat.getCol3().getXYZ( ) );
-    return Matrix4( orthoInverse( affineMat ) );
-}
-
-inline float determinant( const Matrix4 & mat )
-{
-    float dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-    mA = mat.getCol0().getX();
-    mB = mat.getCol0().getY();
-    mC = mat.getCol0().getZ();
-    mD = mat.getCol0().getW();
-    mE = mat.getCol1().getX();
-    mF = mat.getCol1().getY();
-    mG = mat.getCol1().getZ();
-    mH = mat.getCol1().getW();
-    mI = mat.getCol2().getX();
-    mJ = mat.getCol2().getY();
-    mK = mat.getCol2().getZ();
-    mL = mat.getCol2().getW();
-    mM = mat.getCol3().getX();
-    mN = mat.getCol3().getY();
-    mO = mat.getCol3().getZ();
-    mP = mat.getCol3().getW();
-    tmp0 = ( ( mK * mD ) - ( mC * mL ) );
-    tmp1 = ( ( mO * mH ) - ( mG * mP ) );
-    tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
-    tmp3 = ( ( mF * mO ) - ( mN * mG ) );
-    tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
-    tmp5 = ( ( mN * mH ) - ( mF * mP ) );
-    dx = ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) );
-    dy = ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) );
-    dz = ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) );
-    dw = ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) );
-    return ( ( ( ( mA * dx ) + ( mE * dy ) ) + ( mI * dz ) ) + ( mM * dw ) );
-}
-
-inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( mCol0 + mat.mCol0 ),
-        ( mCol1 + mat.mCol1 ),
-        ( mCol2 + mat.mCol2 ),
-        ( mCol3 + mat.mCol3 )
-    );
-}
-
-inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( mCol0 - mat.mCol0 ),
-        ( mCol1 - mat.mCol1 ),
-        ( mCol2 - mat.mCol2 ),
-        ( mCol3 - mat.mCol3 )
-    );
-}
-
-inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
-{
-    *this = *this + mat;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
-{
-    *this = *this - mat;
-    return *this;
-}
-
-inline const Matrix4 Matrix4::operator -( ) const
-{
-    return Matrix4(
-        ( -mCol0 ),
-        ( -mCol1 ),
-        ( -mCol2 ),
-        ( -mCol3 )
-    );
-}
-
-inline const Matrix4 absPerElem( const Matrix4 & mat )
-{
-    return Matrix4(
-        absPerElem( mat.getCol0() ),
-        absPerElem( mat.getCol1() ),
-        absPerElem( mat.getCol2() ),
-        absPerElem( mat.getCol3() )
-    );
-}
-
-inline const Matrix4 Matrix4::operator *( float scalar ) const
-{
-    return Matrix4(
-        ( mCol0 * scalar ),
-        ( mCol1 * scalar ),
-        ( mCol2 * scalar ),
-        ( mCol3 * scalar )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Matrix4 operator *( float scalar, const Matrix4 & mat )
-{
-    return mat * scalar;
-}
-
-inline const Vector4 Matrix4::operator *( const Vector4 & vec ) const
-{
-    return Vector4(
-        ( ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ) + ( mCol3.getX() * vec.getW() ) ),
-        ( ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ) + ( mCol3.getY() * vec.getW() ) ),
-        ( ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ) + ( mCol3.getZ() * vec.getW() ) ),
-        ( ( ( ( mCol0.getW() * vec.getX() ) + ( mCol1.getW() * vec.getY() ) ) + ( mCol2.getW() * vec.getZ() ) ) + ( mCol3.getW() * vec.getW() ) )
-    );
-}
-
-inline const Vector4 Matrix4::operator *( const Vector3 & vec ) const
-{
-    return Vector4(
-        ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ),
-        ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ),
-        ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ),
-        ( ( ( mCol0.getW() * vec.getX() ) + ( mCol1.getW() * vec.getY() ) ) + ( mCol2.getW() * vec.getZ() ) )
-    );
-}
-
-inline const Vector4 Matrix4::operator *( const Point3 & pnt ) const
-{
-    return Vector4(
-        ( ( ( ( mCol0.getX() * pnt.getX() ) + ( mCol1.getX() * pnt.getY() ) ) + ( mCol2.getX() * pnt.getZ() ) ) + mCol3.getX() ),
-        ( ( ( ( mCol0.getY() * pnt.getX() ) + ( mCol1.getY() * pnt.getY() ) ) + ( mCol2.getY() * pnt.getZ() ) ) + mCol3.getY() ),
-        ( ( ( ( mCol0.getZ() * pnt.getX() ) + ( mCol1.getZ() * pnt.getY() ) ) + ( mCol2.getZ() * pnt.getZ() ) ) + mCol3.getZ() ),
-        ( ( ( ( mCol0.getW() * pnt.getX() ) + ( mCol1.getW() * pnt.getY() ) ) + ( mCol2.getW() * pnt.getZ() ) ) + mCol3.getW() )
-    );
-}
-
-inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( *this * mat.mCol0 ),
-        ( *this * mat.mCol1 ),
-        ( *this * mat.mCol2 ),
-        ( *this * mat.mCol3 )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
-{
-    *this = *this * mat;
-    return *this;
-}
-
-inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
-{
-    return Matrix4(
-        ( *this * tfrm.getCol0() ),
-        ( *this * tfrm.getCol1() ),
-        ( *this * tfrm.getCol2() ),
-        ( *this * Point3( tfrm.getCol3() ) )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
-{
-    *this = *this * tfrm;
-    return *this;
-}
-
-inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
-{
-    return Matrix4(
-        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
-        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
-        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
-        mulPerElem( mat0.getCol3(), mat1.getCol3() )
-    );
-}
-
-inline const Matrix4 Matrix4::identity( )
-{
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4::yAxis( ),
-        Vector4::zAxis( ),
-        Vector4::wAxis( )
-    );
-}
-
-inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
-{
-    mCol0.setXYZ( mat3.getCol0() );
-    mCol1.setXYZ( mat3.getCol1() );
-    mCol2.setXYZ( mat3.getCol2() );
-    return *this;
-}
-
-inline const Matrix3 Matrix4::getUpper3x3( ) const
-{
-    return Matrix3(
-        mCol0.getXYZ( ),
-        mCol1.getXYZ( ),
-        mCol2.getXYZ( )
-    );
-}
-
-inline Matrix4 & Matrix4::setTranslation( const Vector3 & translateVec )
-{
-    mCol3.setXYZ( translateVec );
-    return *this;
-}
-
-inline const Vector3 Matrix4::getTranslation( ) const
-{
-    return mCol3.getXYZ( );
-}
-
-inline const Matrix4 Matrix4::rotationX( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4( 0.0f, c, s, 0.0f ),
-        Vector4( 0.0f, -s, c, 0.0f ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationY( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Matrix4(
-        Vector4( c, 0.0f, -s, 0.0f ),
-        Vector4::yAxis( ),
-        Vector4( s, 0.0f, c, 0.0f ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationZ( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Matrix4(
-        Vector4( c, s, 0.0f, 0.0f ),
-        Vector4( -s, c, 0.0f, 0.0f ),
-        Vector4::zAxis( ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationZYX( const Vector3 & radiansXYZ )
-{
-    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sX = sinf( radiansXYZ.getX() );
-    cX = cosf( radiansXYZ.getX() );
-    sY = sinf( radiansXYZ.getY() );
-    cY = cosf( radiansXYZ.getY() );
-    sZ = sinf( radiansXYZ.getZ() );
-    cZ = cosf( radiansXYZ.getZ() );
-    tmp0 = ( cZ * sY );
-    tmp1 = ( sZ * sY );
-    return Matrix4(
-        Vector4( ( cZ * cY ), ( sZ * cY ), -sY, 0.0f ),
-        Vector4( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ), 0.0f ),
-        Vector4( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ), 0.0f ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotation( float radians, const Vector3 & unitVec )
-{
-    float x, y, z, s, c, oneMinusC, xy, yz, zx;
-    s = sinf( radians );
-    c = cosf( radians );
-    x = unitVec.getX();
-    y = unitVec.getY();
-    z = unitVec.getZ();
-    xy = ( x * y );
-    yz = ( y * z );
-    zx = ( z * x );
-    oneMinusC = ( 1.0f - c );
-    return Matrix4(
-        Vector4( ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ), 0.0f ),
-        Vector4( ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ), 0.0f ),
-        Vector4( ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ), 0.0f ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotation( const Quat & unitQuat )
-{
-    return Matrix4( Transform3::rotation( unitQuat ) );
-}
-
-inline const Matrix4 Matrix4::scale( const Vector3 & scaleVec )
-{
-    return Matrix4(
-        Vector4( scaleVec.getX(), 0.0f, 0.0f, 0.0f ),
-        Vector4( 0.0f, scaleVec.getY(), 0.0f, 0.0f ),
-        Vector4( 0.0f, 0.0f, scaleVec.getZ(), 0.0f ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec )
-{
-    return Matrix4(
-        ( mat.getCol0() * scaleVec.getX( ) ),
-        ( mat.getCol1() * scaleVec.getY( ) ),
-        ( mat.getCol2() * scaleVec.getZ( ) ),
-        mat.getCol3()
-    );
-}
-
-inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat )
-{
-    Vector4 scale4;
-    scale4 = Vector4( scaleVec, 1.0f );
-    return Matrix4(
-        mulPerElem( mat.getCol0(), scale4 ),
-        mulPerElem( mat.getCol1(), scale4 ),
-        mulPerElem( mat.getCol2(), scale4 ),
-        mulPerElem( mat.getCol3(), scale4 )
-    );
-}
-
-inline const Matrix4 Matrix4::translation( const Vector3 & translateVec )
-{
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4::yAxis( ),
-        Vector4::zAxis( ),
-        Vector4( translateVec, 1.0f )
-    );
-}
-
-inline const Matrix4 Matrix4::lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec )
-{
-    Matrix4 m4EyeFrame;
-    Vector3 v3X, v3Y, v3Z;
-    v3Y = normalize( upVec );
-    v3Z = normalize( ( eyePos - lookAtPos ) );
-    v3X = normalize( cross( v3Y, v3Z ) );
-    v3Y = cross( v3Z, v3X );
-    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
-    return orthoInverse( m4EyeFrame );
-}
-
-inline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar )
-{
-    float f, rangeInv;
-    f = tanf( ( (float)( _VECTORMATH_PI_OVER_2 ) - ( 0.5f * fovyRadians ) ) );
-    rangeInv = ( 1.0f / ( zNear - zFar ) );
-    return Matrix4(
-        Vector4( ( f / aspect ), 0.0f, 0.0f, 0.0f ),
-        Vector4( 0.0f, f, 0.0f, 0.0f ),
-        Vector4( 0.0f, 0.0f, ( ( zNear + zFar ) * rangeInv ), -1.0f ),
-        Vector4( 0.0f, 0.0f, ( ( ( zNear * zFar ) * rangeInv ) * 2.0f ), 0.0f )
-    );
-}
-
-inline const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
-    sum_rl = ( right + left );
-    sum_tb = ( top + bottom );
-    sum_nf = ( zNear + zFar );
-    inv_rl = ( 1.0f / ( right - left ) );
-    inv_tb = ( 1.0f / ( top - bottom ) );
-    inv_nf = ( 1.0f / ( zNear - zFar ) );
-    n2 = ( zNear + zNear );
-    return Matrix4(
-        Vector4( ( n2 * inv_rl ), 0.0f, 0.0f, 0.0f ),
-        Vector4( 0.0f, ( n2 * inv_tb ), 0.0f, 0.0f ),
-        Vector4( ( sum_rl * inv_rl ), ( sum_tb * inv_tb ), ( sum_nf * inv_nf ), -1.0f ),
-        Vector4( 0.0f, 0.0f, ( ( n2 * inv_nf ) * zFar ), 0.0f )
-    );
-}
-
-inline const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
-    sum_rl = ( right + left );
-    sum_tb = ( top + bottom );
-    sum_nf = ( zNear + zFar );
-    inv_rl = ( 1.0f / ( right - left ) );
-    inv_tb = ( 1.0f / ( top - bottom ) );
-    inv_nf = ( 1.0f / ( zNear - zFar ) );
-    return Matrix4(
-        Vector4( ( inv_rl + inv_rl ), 0.0f, 0.0f, 0.0f ),
-        Vector4( 0.0f, ( inv_tb + inv_tb ), 0.0f, 0.0f ),
-        Vector4( 0.0f, 0.0f, ( inv_nf + inv_nf ), 0.0f ),
-        Vector4( ( -sum_rl * inv_rl ), ( -sum_tb * inv_tb ), ( sum_nf * inv_nf ), 1.0f )
-    );
-}
-
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 )
-{
-    return Matrix4(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 ),
-        select( mat0.getCol3(), mat1.getCol3(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Matrix4 & mat )
-{
-    print( mat.getRow( 0 ) );
-    print( mat.getRow( 1 ) );
-    print( mat.getRow( 2 ) );
-    print( mat.getRow( 3 ) );
-}
-
-inline void print( const Matrix4 & mat, const char * name )
-{
-    printf("%s:\n", name);
-    print( mat );
-}
-
-#endif
-
-inline Transform3::Transform3( const Transform3 & tfrm )
-{
-    mCol0 = tfrm.mCol0;
-    mCol1 = tfrm.mCol1;
-    mCol2 = tfrm.mCol2;
-    mCol3 = tfrm.mCol3;
-}
-
-inline Transform3::Transform3( float scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-    mCol3 = Vector3( scalar );
-}
-
-inline Transform3::Transform3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2, const Vector3 & _col3 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-    mCol3 = _col3;
-}
-
-inline Transform3::Transform3( const Matrix3 & tfrm, const Vector3 & translateVec )
-{
-    this->setUpper3x3( tfrm );
-    this->setTranslation( translateVec );
-}
-
-inline Transform3::Transform3( const Quat & unitQuat, const Vector3 & translateVec )
-{
-    this->setUpper3x3( Matrix3( unitQuat ) );
-    this->setTranslation( translateVec );
-}
-
-inline Transform3 & Transform3::setCol0( const Vector3 & _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol1( const Vector3 & _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol2( const Vector3 & _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol3( const Vector3 & _col3 )
-{
-    mCol3 = _col3;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol( int col, const Vector3 & vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Transform3 & Transform3::setRow( int row, const Vector4 & vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    mCol3.setElem( row, vec.getElem( 3 ) );
-    return *this;
-}
-
-inline Transform3 & Transform3::setElem( int col, int row, float val )
-{
-    Vector3 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline float Transform3::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector3 Transform3::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector3 Transform3::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector3 Transform3::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector3 Transform3::getCol3( ) const
-{
-    return mCol3;
-}
-
-inline const Vector3 Transform3::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Transform3::getRow( int row ) const
-{
-    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
-}
-
-inline Vector3 & Transform3::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Transform3::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
-{
-    mCol0 = tfrm.mCol0;
-    mCol1 = tfrm.mCol1;
-    mCol2 = tfrm.mCol2;
-    mCol3 = tfrm.mCol3;
-    return *this;
-}
-
-inline const Transform3 inverse( const Transform3 & tfrm )
-{
-    Vector3 tmp0, tmp1, tmp2, inv0, inv1, inv2;
-    float detinv;
-    tmp0 = cross( tfrm.getCol1(), tfrm.getCol2() );
-    tmp1 = cross( tfrm.getCol2(), tfrm.getCol0() );
-    tmp2 = cross( tfrm.getCol0(), tfrm.getCol1() );
-    detinv = ( 1.0f / dot( tfrm.getCol2(), tmp2 ) );
-    inv0 = Vector3( ( tmp0.getX() * detinv ), ( tmp1.getX() * detinv ), ( tmp2.getX() * detinv ) );
-    inv1 = Vector3( ( tmp0.getY() * detinv ), ( tmp1.getY() * detinv ), ( tmp2.getY() * detinv ) );
-    inv2 = Vector3( ( tmp0.getZ() * detinv ), ( tmp1.getZ() * detinv ), ( tmp2.getZ() * detinv ) );
-    return Transform3(
-        inv0,
-        inv1,
-        inv2,
-        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
-    );
-}
-
-inline const Transform3 orthoInverse( const Transform3 & tfrm )
-{
-    Vector3 inv0, inv1, inv2;
-    inv0 = Vector3( tfrm.getCol0().getX(), tfrm.getCol1().getX(), tfrm.getCol2().getX() );
-    inv1 = Vector3( tfrm.getCol0().getY(), tfrm.getCol1().getY(), tfrm.getCol2().getY() );
-    inv2 = Vector3( tfrm.getCol0().getZ(), tfrm.getCol1().getZ(), tfrm.getCol2().getZ() );
-    return Transform3(
-        inv0,
-        inv1,
-        inv2,
-        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
-    );
-}
-
-inline const Transform3 absPerElem( const Transform3 & tfrm )
-{
-    return Transform3(
-        absPerElem( tfrm.getCol0() ),
-        absPerElem( tfrm.getCol1() ),
-        absPerElem( tfrm.getCol2() ),
-        absPerElem( tfrm.getCol3() )
-    );
-}
-
-inline const Vector3 Transform3::operator *( const Vector3 & vec ) const
-{
-    return Vector3(
-        ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ),
-        ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ),
-        ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) )
-    );
-}
-
-inline const Point3 Transform3::operator *( const Point3 & pnt ) const
-{
-    return Point3(
-        ( ( ( ( mCol0.getX() * pnt.getX() ) + ( mCol1.getX() * pnt.getY() ) ) + ( mCol2.getX() * pnt.getZ() ) ) + mCol3.getX() ),
-        ( ( ( ( mCol0.getY() * pnt.getX() ) + ( mCol1.getY() * pnt.getY() ) ) + ( mCol2.getY() * pnt.getZ() ) ) + mCol3.getY() ),
-        ( ( ( ( mCol0.getZ() * pnt.getX() ) + ( mCol1.getZ() * pnt.getY() ) ) + ( mCol2.getZ() * pnt.getZ() ) ) + mCol3.getZ() )
-    );
-}
-
-inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
-{
-    return Transform3(
-        ( *this * tfrm.mCol0 ),
-        ( *this * tfrm.mCol1 ),
-        ( *this * tfrm.mCol2 ),
-        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
-    );
-}
-
-inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
-{
-    *this = *this * tfrm;
-    return *this;
-}
-
-inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
-{
-    return Transform3(
-        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
-        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
-        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
-        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
-    );
-}
-
-inline const Transform3 Transform3::identity( )
-{
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( ),
-        Vector3( 0.0f )
-    );
-}
-
-inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
-{
-    mCol0 = tfrm.getCol0();
-    mCol1 = tfrm.getCol1();
-    mCol2 = tfrm.getCol2();
-    return *this;
-}
-
-inline const Matrix3 Transform3::getUpper3x3( ) const
-{
-    return Matrix3( mCol0, mCol1, mCol2 );
-}
-
-inline Transform3 & Transform3::setTranslation( const Vector3 & translateVec )
-{
-    mCol3 = translateVec;
-    return *this;
-}
-
-inline const Vector3 Transform3::getTranslation( ) const
-{
-    return mCol3;
-}
-
-inline const Transform3 Transform3::rotationX( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3( 0.0f, c, s ),
-        Vector3( 0.0f, -s, c ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotationY( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Transform3(
-        Vector3( c, 0.0f, -s ),
-        Vector3::yAxis( ),
-        Vector3( s, 0.0f, c ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotationZ( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Transform3(
-        Vector3( c, s, 0.0f ),
-        Vector3( -s, c, 0.0f ),
-        Vector3::zAxis( ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotationZYX( const Vector3 & radiansXYZ )
-{
-    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sX = sinf( radiansXYZ.getX() );
-    cX = cosf( radiansXYZ.getX() );
-    sY = sinf( radiansXYZ.getY() );
-    cY = cosf( radiansXYZ.getY() );
-    sZ = sinf( radiansXYZ.getZ() );
-    cZ = cosf( radiansXYZ.getZ() );
-    tmp0 = ( cZ * sY );
-    tmp1 = ( sZ * sY );
-    return Transform3(
-        Vector3( ( cZ * cY ), ( sZ * cY ), -sY ),
-        Vector3( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) ),
-        Vector3( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotation( float radians, const Vector3 & unitVec )
-{
-    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) );
-}
-
-inline const Transform3 Transform3::rotation( const Quat & unitQuat )
-{
-    return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) );
-}
-
-inline const Transform3 Transform3::scale( const Vector3 & scaleVec )
-{
-    return Transform3(
-        Vector3( scaleVec.getX(), 0.0f, 0.0f ),
-        Vector3( 0.0f, scaleVec.getY(), 0.0f ),
-        Vector3( 0.0f, 0.0f, scaleVec.getZ() ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec )
-{
-    return Transform3(
-        ( tfrm.getCol0() * scaleVec.getX( ) ),
-        ( tfrm.getCol1() * scaleVec.getY( ) ),
-        ( tfrm.getCol2() * scaleVec.getZ( ) ),
-        tfrm.getCol3()
-    );
-}
-
-inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm )
-{
-    return Transform3(
-        mulPerElem( tfrm.getCol0(), scaleVec ),
-        mulPerElem( tfrm.getCol1(), scaleVec ),
-        mulPerElem( tfrm.getCol2(), scaleVec ),
-        mulPerElem( tfrm.getCol3(), scaleVec )
-    );
-}
-
-inline const Transform3 Transform3::translation( const Vector3 & translateVec )
-{
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( ),
-        translateVec
-    );
-}
-
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 )
-{
-    return Transform3(
-        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
-        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
-        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
-        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Transform3 & tfrm )
-{
-    print( tfrm.getRow( 0 ) );
-    print( tfrm.getRow( 1 ) );
-    print( tfrm.getRow( 2 ) );
-}
-
-inline void print( const Transform3 & tfrm, const char * name )
-{
-    printf("%s:\n", name);
-    print( tfrm );
-}
-
-#endif
-
-inline Quat::Quat( const Matrix3 & tfrm )
-{
-    float trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
-    int negTrace, ZgtX, ZgtY, YgtX;
-    int largestXorY, largestYorZ, largestZorX;
-
-    xx = tfrm.getCol0().getX();
-    yx = tfrm.getCol0().getY();
-    zx = tfrm.getCol0().getZ();
-    xy = tfrm.getCol1().getX();
-    yy = tfrm.getCol1().getY();
-    zy = tfrm.getCol1().getZ();
-    xz = tfrm.getCol2().getX();
-    yz = tfrm.getCol2().getY();
-    zz = tfrm.getCol2().getZ();
-
-    trace = ( ( xx + yy ) + zz );
-
-    negTrace = ( trace < 0.0f );
-    ZgtX = zz > xx;
-    ZgtY = zz > yy;
-    YgtX = yy > xx;
-    largestXorY = ( !ZgtX || !ZgtY ) && negTrace;
-    largestYorZ = ( YgtX || ZgtX ) && negTrace;
-    largestZorX = ( ZgtY || !YgtX ) && negTrace;
-    
-    if ( largestXorY )
-    {
-        zz = -zz;
-        xy = -xy;
-    }
-    if ( largestYorZ )
-    {
-        xx = -xx;
-        yz = -yz;
-    }
-    if ( largestZorX )
-    {
-        yy = -yy;
-        zx = -zx;
-    }
-
-    radicand = ( ( ( xx + yy ) + zz ) + 1.0f );
-    scale = ( 0.5f * ( 1.0f / sqrtf( radicand ) ) );
-
-    tmpx = ( ( zy - yz ) * scale );
-    tmpy = ( ( xz - zx ) * scale );
-    tmpz = ( ( yx - xy ) * scale );
-    tmpw = ( radicand * scale );
-    qx = tmpx;
-    qy = tmpy;
-    qz = tmpz;
-    qw = tmpw;
-
-    if ( largestXorY )
-    {
-        qx = tmpw;
-        qy = tmpz;
-        qz = tmpy;
-        qw = tmpx;
-    }
-    if ( largestYorZ )
-    {
-        tmpx = qx;
-        tmpz = qz;
-        qx = qy;
-        qy = tmpx;
-        qz = qw;
-        qw = tmpz;
-    }
-
-    mX = qx;
-    mY = qy;
-    mZ = qz;
-    mW = qw;
-}
-
-inline const Matrix3 outer( const Vector3 & tfrm0, const Vector3 & tfrm1 )
-{
-    return Matrix3(
-        ( tfrm0 * tfrm1.getX( ) ),
-        ( tfrm0 * tfrm1.getY( ) ),
-        ( tfrm0 * tfrm1.getZ( ) )
-    );
-}
-
-inline const Matrix4 outer( const Vector4 & tfrm0, const Vector4 & tfrm1 )
-{
-    return Matrix4(
-        ( tfrm0 * tfrm1.getX( ) ),
-        ( tfrm0 * tfrm1.getY( ) ),
-        ( tfrm0 * tfrm1.getZ( ) ),
-        ( tfrm0 * tfrm1.getW( ) )
-    );
-}
-
-inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat )
-{
-    return Vector3(
-        ( ( ( vec.getX() * mat.getCol0().getX() ) + ( vec.getY() * mat.getCol0().getY() ) ) + ( vec.getZ() * mat.getCol0().getZ() ) ),
-        ( ( ( vec.getX() * mat.getCol1().getX() ) + ( vec.getY() * mat.getCol1().getY() ) ) + ( vec.getZ() * mat.getCol1().getZ() ) ),
-        ( ( ( vec.getX() * mat.getCol2().getX() ) + ( vec.getY() * mat.getCol2().getY() ) ) + ( vec.getZ() * mat.getCol2().getZ() ) )
-    );
-}
-
-inline const Matrix3 crossMatrix( const Vector3 & vec )
-{
-    return Matrix3(
-        Vector3( 0.0f, vec.getZ(), -vec.getY() ),
-        Vector3( -vec.getZ(), 0.0f, vec.getX() ),
-        Vector3( vec.getY(), -vec.getX(), 0.0f )
-    );
-}
-
-inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat )
-{
-    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
-}
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_AOS_CPP_H
+#define _VECTORMATH_MAT_AOS_CPP_H
+
+namespace Vectormath {
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// Constants
+
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+inline Matrix3::Matrix3( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+}
+
+inline Matrix3::Matrix3( float scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+}
+
+inline Matrix3::Matrix3( const Quat & unitQuat )
+{
+    float qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
+    qx = unitQuat.getX();
+    qy = unitQuat.getY();
+    qz = unitQuat.getZ();
+    qw = unitQuat.getW();
+    qx2 = ( qx + qx );
+    qy2 = ( qy + qy );
+    qz2 = ( qz + qz );
+    qxqx2 = ( qx * qx2 );
+    qxqy2 = ( qx * qy2 );
+    qxqz2 = ( qx * qz2 );
+    qxqw2 = ( qw * qx2 );
+    qyqy2 = ( qy * qy2 );
+    qyqz2 = ( qy * qz2 );
+    qyqw2 = ( qw * qy2 );
+    qzqz2 = ( qz * qz2 );
+    qzqw2 = ( qw * qz2 );
+    mCol0 = Vector3( ( ( 1.0f - qyqy2 ) - qzqz2 ), ( qxqy2 + qzqw2 ), ( qxqz2 - qyqw2 ) );
+    mCol1 = Vector3( ( qxqy2 - qzqw2 ), ( ( 1.0f - qxqx2 ) - qzqz2 ), ( qyqz2 + qxqw2 ) );
+    mCol2 = Vector3( ( qxqz2 + qyqw2 ), ( qyqz2 - qxqw2 ), ( ( 1.0f - qxqx2 ) - qyqy2 ) );
+}
+
+inline Matrix3::Matrix3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+}
+
+inline Matrix3 & Matrix3::setCol0( const Vector3 & _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol1( const Vector3 & _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol2( const Vector3 & _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol( int col, const Vector3 & vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setRow( int row, const Vector3 & vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setElem( int col, int row, float val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline float Matrix3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Matrix3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Matrix3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Matrix3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Matrix3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::getRow( int row ) const
+{
+    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
+}
+
+inline Vector3 & Matrix3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    return *this;
+}
+
+inline const Matrix3 transpose( const Matrix3 & mat )
+{
+    return Matrix3(
+        Vector3( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX() ),
+        Vector3( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY() ),
+        Vector3( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ() )
+    );
+}
+
+inline const Matrix3 inverse( const Matrix3 & mat )
+{
+    Vector3 tmp0, tmp1, tmp2;
+    float detinv;
+    tmp0 = cross( mat.getCol1(), mat.getCol2() );
+    tmp1 = cross( mat.getCol2(), mat.getCol0() );
+    tmp2 = cross( mat.getCol0(), mat.getCol1() );
+    detinv = ( 1.0f / dot( mat.getCol2(), tmp2 ) );
+    return Matrix3(
+        Vector3( ( tmp0.getX() * detinv ), ( tmp1.getX() * detinv ), ( tmp2.getX() * detinv ) ),
+        Vector3( ( tmp0.getY() * detinv ), ( tmp1.getY() * detinv ), ( tmp2.getY() * detinv ) ),
+        Vector3( ( tmp0.getZ() * detinv ), ( tmp1.getZ() * detinv ), ( tmp2.getZ() * detinv ) )
+    );
+}
+
+inline float determinant( const Matrix3 & mat )
+{
+    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
+}
+
+inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 )
+    );
+}
+
+inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix3 Matrix3::operator -( ) const
+{
+    return Matrix3(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 )
+    );
+}
+
+inline const Matrix3 absPerElem( const Matrix3 & mat )
+{
+    return Matrix3(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::operator *( float scalar ) const
+{
+    return Matrix3(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix3 operator *( float scalar, const Matrix3 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector3 Matrix3::operator *( const Vector3 & vec ) const
+{
+    return Vector3(
+        ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ),
+        ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ),
+        ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) )
+    );
+}
+
+inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
+{
+    return Matrix3(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::identity( )
+{
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationX( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3( 0.0f, c, s ),
+        Vector3( 0.0f, -s, c )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationY( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Matrix3(
+        Vector3( c, 0.0f, -s ),
+        Vector3::yAxis( ),
+        Vector3( s, 0.0f, c )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationZ( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Matrix3(
+        Vector3( c, s, 0.0f ),
+        Vector3( -s, c, 0.0f ),
+        Vector3::zAxis( )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationZYX( const Vector3 & radiansXYZ )
+{
+    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sX = sinf( radiansXYZ.getX() );
+    cX = cosf( radiansXYZ.getX() );
+    sY = sinf( radiansXYZ.getY() );
+    cY = cosf( radiansXYZ.getY() );
+    sZ = sinf( radiansXYZ.getZ() );
+    cZ = cosf( radiansXYZ.getZ() );
+    tmp0 = ( cZ * sY );
+    tmp1 = ( sZ * sY );
+    return Matrix3(
+        Vector3( ( cZ * cY ), ( sZ * cY ), -sY ),
+        Vector3( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) ),
+        Vector3( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( float radians, const Vector3 & unitVec )
+{
+    float x, y, z, s, c, oneMinusC, xy, yz, zx;
+    s = sinf( radians );
+    c = cosf( radians );
+    x = unitVec.getX();
+    y = unitVec.getY();
+    z = unitVec.getZ();
+    xy = ( x * y );
+    yz = ( y * z );
+    zx = ( z * x );
+    oneMinusC = ( 1.0f - c );
+    return Matrix3(
+        Vector3( ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ) ),
+        Vector3( ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ) ),
+        Vector3( ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( const Quat & unitQuat )
+{
+    return Matrix3( unitQuat );
+}
+
+inline const Matrix3 Matrix3::scale( const Vector3 & scaleVec )
+{
+    return Matrix3(
+        Vector3( scaleVec.getX(), 0.0f, 0.0f ),
+        Vector3( 0.0f, scaleVec.getY(), 0.0f ),
+        Vector3( 0.0f, 0.0f, scaleVec.getZ() )
+    );
+}
+
+inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec )
+{
+    return Matrix3(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) )
+    );
+}
+
+inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat )
+{
+    return Matrix3(
+        mulPerElem( mat.getCol0(), scaleVec ),
+        mulPerElem( mat.getCol1(), scaleVec ),
+        mulPerElem( mat.getCol2(), scaleVec )
+    );
+}
+
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 )
+{
+    return Matrix3(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix3 & mat )
+{
+    print( mat.getRow( 0 ) );
+    print( mat.getRow( 1 ) );
+    print( mat.getRow( 2 ) );
+}
+
+inline void print( const Matrix3 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Matrix4::Matrix4( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+}
+
+inline Matrix4::Matrix4( float scalar )
+{
+    mCol0 = Vector4( scalar );
+    mCol1 = Vector4( scalar );
+    mCol2 = Vector4( scalar );
+    mCol3 = Vector4( scalar );
+}
+
+inline Matrix4::Matrix4( const Transform3 & mat )
+{
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( mat.getCol3(), 1.0f );
+}
+
+inline Matrix4::Matrix4( const Vector4 & _col0, const Vector4 & _col1, const Vector4 & _col2, const Vector4 & _col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Matrix4::Matrix4( const Matrix3 & mat, const Vector3 & translateVec )
+{
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( translateVec, 1.0f );
+}
+
+inline Matrix4::Matrix4( const Quat & unitQuat, const Vector3 & translateVec )
+{
+    Matrix3 mat;
+    mat = Matrix3( unitQuat );
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( translateVec, 1.0f );
+}
+
+inline Matrix4 & Matrix4::setCol0( const Vector4 & _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol1( const Vector4 & _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol2( const Vector4 & _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol3( const Vector4 & _col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol( int col, const Vector4 & vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setRow( int row, const Vector4 & vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setElem( int col, int row, float val )
+{
+    Vector4 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline float Matrix4::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector4 Matrix4::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector4 Matrix4::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector4 Matrix4::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector4 Matrix4::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector4 Matrix4::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector4 & Matrix4::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+    return *this;
+}
+
+inline const Matrix4 transpose( const Matrix4 & mat )
+{
+    return Matrix4(
+        Vector4( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX(), mat.getCol3().getX() ),
+        Vector4( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY(), mat.getCol3().getY() ),
+        Vector4( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ(), mat.getCol3().getZ() ),
+        Vector4( mat.getCol0().getW(), mat.getCol1().getW(), mat.getCol2().getW(), mat.getCol3().getW() )
+    );
+}
+
+inline const Matrix4 inverse( const Matrix4 & mat )
+{
+    Vector4 res0, res1, res2, res3;
+    float mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
+    mA = mat.getCol0().getX();
+    mB = mat.getCol0().getY();
+    mC = mat.getCol0().getZ();
+    mD = mat.getCol0().getW();
+    mE = mat.getCol1().getX();
+    mF = mat.getCol1().getY();
+    mG = mat.getCol1().getZ();
+    mH = mat.getCol1().getW();
+    mI = mat.getCol2().getX();
+    mJ = mat.getCol2().getY();
+    mK = mat.getCol2().getZ();
+    mL = mat.getCol2().getW();
+    mM = mat.getCol3().getX();
+    mN = mat.getCol3().getY();
+    mO = mat.getCol3().getZ();
+    mP = mat.getCol3().getW();
+    tmp0 = ( ( mK * mD ) - ( mC * mL ) );
+    tmp1 = ( ( mO * mH ) - ( mG * mP ) );
+    tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
+    tmp3 = ( ( mF * mO ) - ( mN * mG ) );
+    tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
+    tmp5 = ( ( mN * mH ) - ( mF * mP ) );
+    res0.setX( ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) ) );
+    res0.setY( ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) ) );
+    res0.setZ( ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) ) );
+    res0.setW( ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) ) );
+    detInv = ( 1.0f / ( ( ( ( mA * res0.getX() ) + ( mE * res0.getY() ) ) + ( mI * res0.getZ() ) ) + ( mM * res0.getW() ) ) );
+    res1.setX( ( mI * tmp1 ) );
+    res1.setY( ( mM * tmp0 ) );
+    res1.setZ( ( mA * tmp1 ) );
+    res1.setW( ( mE * tmp0 ) );
+    res3.setX( ( mI * tmp3 ) );
+    res3.setY( ( mM * tmp2 ) );
+    res3.setZ( ( mA * tmp3 ) );
+    res3.setW( ( mE * tmp2 ) );
+    res2.setX( ( mI * tmp5 ) );
+    res2.setY( ( mM * tmp4 ) );
+    res2.setZ( ( mA * tmp5 ) );
+    res2.setW( ( mE * tmp4 ) );
+    tmp0 = ( ( mI * mB ) - ( mA * mJ ) );
+    tmp1 = ( ( mM * mF ) - ( mE * mN ) );
+    tmp2 = ( ( mI * mD ) - ( mA * mL ) );
+    tmp3 = ( ( mM * mH ) - ( mE * mP ) );
+    tmp4 = ( ( mI * mC ) - ( mA * mK ) );
+    tmp5 = ( ( mM * mG ) - ( mE * mO ) );
+    res2.setX( ( ( ( mL * tmp1 ) - ( mJ * tmp3 ) ) + res2.getX() ) );
+    res2.setY( ( ( ( mP * tmp0 ) - ( mN * tmp2 ) ) + res2.getY() ) );
+    res2.setZ( ( ( ( mB * tmp3 ) - ( mD * tmp1 ) ) - res2.getZ() ) );
+    res2.setW( ( ( ( mF * tmp2 ) - ( mH * tmp0 ) ) - res2.getW() ) );
+    res3.setX( ( ( ( mJ * tmp5 ) - ( mK * tmp1 ) ) + res3.getX() ) );
+    res3.setY( ( ( ( mN * tmp4 ) - ( mO * tmp0 ) ) + res3.getY() ) );
+    res3.setZ( ( ( ( mC * tmp1 ) - ( mB * tmp5 ) ) - res3.getZ() ) );
+    res3.setW( ( ( ( mG * tmp0 ) - ( mF * tmp4 ) ) - res3.getW() ) );
+    res1.setX( ( ( ( mK * tmp3 ) - ( mL * tmp5 ) ) - res1.getX() ) );
+    res1.setY( ( ( ( mO * tmp2 ) - ( mP * tmp4 ) ) - res1.getY() ) );
+    res1.setZ( ( ( ( mD * tmp5 ) - ( mC * tmp3 ) ) + res1.getZ() ) );
+    res1.setW( ( ( ( mH * tmp4 ) - ( mG * tmp2 ) ) + res1.getW() ) );
+    return Matrix4(
+        ( res0 * detInv ),
+        ( res1 * detInv ),
+        ( res2 * detInv ),
+        ( res3 * detInv )
+    );
+}
+
+inline const Matrix4 affineInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( inverse( affineMat ) );
+}
+
+inline const Matrix4 orthoInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( orthoInverse( affineMat ) );
+}
+
+inline float determinant( const Matrix4 & mat )
+{
+    float dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    mA = mat.getCol0().getX();
+    mB = mat.getCol0().getY();
+    mC = mat.getCol0().getZ();
+    mD = mat.getCol0().getW();
+    mE = mat.getCol1().getX();
+    mF = mat.getCol1().getY();
+    mG = mat.getCol1().getZ();
+    mH = mat.getCol1().getW();
+    mI = mat.getCol2().getX();
+    mJ = mat.getCol2().getY();
+    mK = mat.getCol2().getZ();
+    mL = mat.getCol2().getW();
+    mM = mat.getCol3().getX();
+    mN = mat.getCol3().getY();
+    mO = mat.getCol3().getZ();
+    mP = mat.getCol3().getW();
+    tmp0 = ( ( mK * mD ) - ( mC * mL ) );
+    tmp1 = ( ( mO * mH ) - ( mG * mP ) );
+    tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
+    tmp3 = ( ( mF * mO ) - ( mN * mG ) );
+    tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
+    tmp5 = ( ( mN * mH ) - ( mF * mP ) );
+    dx = ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) );
+    dy = ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) );
+    dz = ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) );
+    dw = ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) );
+    return ( ( ( ( mA * dx ) + ( mE * dy ) ) + ( mI * dz ) ) + ( mM * dw ) );
+}
+
+inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 ),
+        ( mCol3 + mat.mCol3 )
+    );
+}
+
+inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 ),
+        ( mCol3 - mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator -( ) const
+{
+    return Matrix4(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 ),
+        ( -mCol3 )
+    );
+}
+
+inline const Matrix4 absPerElem( const Matrix4 & mat )
+{
+    return Matrix4(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() ),
+        absPerElem( mat.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::operator *( float scalar ) const
+{
+    return Matrix4(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar ),
+        ( mCol3 * scalar )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix4 operator *( float scalar, const Matrix4 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector4 Matrix4::operator *( const Vector4 & vec ) const
+{
+    return Vector4(
+        ( ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ) + ( mCol3.getX() * vec.getW() ) ),
+        ( ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ) + ( mCol3.getY() * vec.getW() ) ),
+        ( ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ) + ( mCol3.getZ() * vec.getW() ) ),
+        ( ( ( ( mCol0.getW() * vec.getX() ) + ( mCol1.getW() * vec.getY() ) ) + ( mCol2.getW() * vec.getZ() ) ) + ( mCol3.getW() * vec.getW() ) )
+    );
+}
+
+inline const Vector4 Matrix4::operator *( const Vector3 & vec ) const
+{
+    return Vector4(
+        ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ),
+        ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ),
+        ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ),
+        ( ( ( mCol0.getW() * vec.getX() ) + ( mCol1.getW() * vec.getY() ) ) + ( mCol2.getW() * vec.getZ() ) )
+    );
+}
+
+inline const Vector4 Matrix4::operator *( const Point3 & pnt ) const
+{
+    return Vector4(
+        ( ( ( ( mCol0.getX() * pnt.getX() ) + ( mCol1.getX() * pnt.getY() ) ) + ( mCol2.getX() * pnt.getZ() ) ) + mCol3.getX() ),
+        ( ( ( ( mCol0.getY() * pnt.getX() ) + ( mCol1.getY() * pnt.getY() ) ) + ( mCol2.getY() * pnt.getZ() ) ) + mCol3.getY() ),
+        ( ( ( ( mCol0.getZ() * pnt.getX() ) + ( mCol1.getZ() * pnt.getY() ) ) + ( mCol2.getZ() * pnt.getZ() ) ) + mCol3.getZ() ),
+        ( ( ( ( mCol0.getW() * pnt.getX() ) + ( mCol1.getW() * pnt.getY() ) ) + ( mCol2.getW() * pnt.getZ() ) ) + mCol3.getW() )
+    );
+}
+
+inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 ),
+        ( *this * mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
+{
+    return Matrix4(
+        ( *this * tfrm.getCol0() ),
+        ( *this * tfrm.getCol1() ),
+        ( *this * tfrm.getCol2() ),
+        ( *this * Point3( tfrm.getCol3() ) )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
+{
+    return Matrix4(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
+        mulPerElem( mat0.getCol3(), mat1.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::identity( )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
+{
+    mCol0.setXYZ( mat3.getCol0() );
+    mCol1.setXYZ( mat3.getCol1() );
+    mCol2.setXYZ( mat3.getCol2() );
+    return *this;
+}
+
+inline const Matrix3 Matrix4::getUpper3x3( ) const
+{
+    return Matrix3(
+        mCol0.getXYZ( ),
+        mCol1.getXYZ( ),
+        mCol2.getXYZ( )
+    );
+}
+
+inline Matrix4 & Matrix4::setTranslation( const Vector3 & translateVec )
+{
+    mCol3.setXYZ( translateVec );
+    return *this;
+}
+
+inline const Vector3 Matrix4::getTranslation( ) const
+{
+    return mCol3.getXYZ( );
+}
+
+inline const Matrix4 Matrix4::rotationX( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4( 0.0f, c, s, 0.0f ),
+        Vector4( 0.0f, -s, c, 0.0f ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationY( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Matrix4(
+        Vector4( c, 0.0f, -s, 0.0f ),
+        Vector4::yAxis( ),
+        Vector4( s, 0.0f, c, 0.0f ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZ( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Matrix4(
+        Vector4( c, s, 0.0f, 0.0f ),
+        Vector4( -s, c, 0.0f, 0.0f ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZYX( const Vector3 & radiansXYZ )
+{
+    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sX = sinf( radiansXYZ.getX() );
+    cX = cosf( radiansXYZ.getX() );
+    sY = sinf( radiansXYZ.getY() );
+    cY = cosf( radiansXYZ.getY() );
+    sZ = sinf( radiansXYZ.getZ() );
+    cZ = cosf( radiansXYZ.getZ() );
+    tmp0 = ( cZ * sY );
+    tmp1 = ( sZ * sY );
+    return Matrix4(
+        Vector4( ( cZ * cY ), ( sZ * cY ), -sY, 0.0f ),
+        Vector4( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ), 0.0f ),
+        Vector4( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ), 0.0f ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( float radians, const Vector3 & unitVec )
+{
+    float x, y, z, s, c, oneMinusC, xy, yz, zx;
+    s = sinf( radians );
+    c = cosf( radians );
+    x = unitVec.getX();
+    y = unitVec.getY();
+    z = unitVec.getZ();
+    xy = ( x * y );
+    yz = ( y * z );
+    zx = ( z * x );
+    oneMinusC = ( 1.0f - c );
+    return Matrix4(
+        Vector4( ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ), 0.0f ),
+        Vector4( ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ), 0.0f ),
+        Vector4( ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ), 0.0f ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( const Quat & unitQuat )
+{
+    return Matrix4( Transform3::rotation( unitQuat ) );
+}
+
+inline const Matrix4 Matrix4::scale( const Vector3 & scaleVec )
+{
+    return Matrix4(
+        Vector4( scaleVec.getX(), 0.0f, 0.0f, 0.0f ),
+        Vector4( 0.0f, scaleVec.getY(), 0.0f, 0.0f ),
+        Vector4( 0.0f, 0.0f, scaleVec.getZ(), 0.0f ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec )
+{
+    return Matrix4(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) ),
+        mat.getCol3()
+    );
+}
+
+inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat )
+{
+    Vector4 scale4;
+    scale4 = Vector4( scaleVec, 1.0f );
+    return Matrix4(
+        mulPerElem( mat.getCol0(), scale4 ),
+        mulPerElem( mat.getCol1(), scale4 ),
+        mulPerElem( mat.getCol2(), scale4 ),
+        mulPerElem( mat.getCol3(), scale4 )
+    );
+}
+
+inline const Matrix4 Matrix4::translation( const Vector3 & translateVec )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4( translateVec, 1.0f )
+    );
+}
+
+inline const Matrix4 Matrix4::lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec )
+{
+    Matrix4 m4EyeFrame;
+    Vector3 v3X, v3Y, v3Z;
+    v3Y = normalize( upVec );
+    v3Z = normalize( ( eyePos - lookAtPos ) );
+    v3X = normalize( cross( v3Y, v3Z ) );
+    v3Y = cross( v3Z, v3X );
+    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
+    return orthoInverse( m4EyeFrame );
+}
+
+inline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar )
+{
+    float f, rangeInv;
+    f = tanf( ( (float)( _VECTORMATH_PI_OVER_2 ) - ( 0.5f * fovyRadians ) ) );
+    rangeInv = ( 1.0f / ( zNear - zFar ) );
+    return Matrix4(
+        Vector4( ( f / aspect ), 0.0f, 0.0f, 0.0f ),
+        Vector4( 0.0f, f, 0.0f, 0.0f ),
+        Vector4( 0.0f, 0.0f, ( ( zNear + zFar ) * rangeInv ), -1.0f ),
+        Vector4( 0.0f, 0.0f, ( ( ( zNear * zFar ) * rangeInv ) * 2.0f ), 0.0f )
+    );
+}
+
+inline const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
+    sum_rl = ( right + left );
+    sum_tb = ( top + bottom );
+    sum_nf = ( zNear + zFar );
+    inv_rl = ( 1.0f / ( right - left ) );
+    inv_tb = ( 1.0f / ( top - bottom ) );
+    inv_nf = ( 1.0f / ( zNear - zFar ) );
+    n2 = ( zNear + zNear );
+    return Matrix4(
+        Vector4( ( n2 * inv_rl ), 0.0f, 0.0f, 0.0f ),
+        Vector4( 0.0f, ( n2 * inv_tb ), 0.0f, 0.0f ),
+        Vector4( ( sum_rl * inv_rl ), ( sum_tb * inv_tb ), ( sum_nf * inv_nf ), -1.0f ),
+        Vector4( 0.0f, 0.0f, ( ( n2 * inv_nf ) * zFar ), 0.0f )
+    );
+}
+
+inline const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
+    sum_rl = ( right + left );
+    sum_tb = ( top + bottom );
+    sum_nf = ( zNear + zFar );
+    inv_rl = ( 1.0f / ( right - left ) );
+    inv_tb = ( 1.0f / ( top - bottom ) );
+    inv_nf = ( 1.0f / ( zNear - zFar ) );
+    return Matrix4(
+        Vector4( ( inv_rl + inv_rl ), 0.0f, 0.0f, 0.0f ),
+        Vector4( 0.0f, ( inv_tb + inv_tb ), 0.0f, 0.0f ),
+        Vector4( 0.0f, 0.0f, ( inv_nf + inv_nf ), 0.0f ),
+        Vector4( ( -sum_rl * inv_rl ), ( -sum_tb * inv_tb ), ( sum_nf * inv_nf ), 1.0f )
+    );
+}
+
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 )
+{
+    return Matrix4(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 ),
+        select( mat0.getCol3(), mat1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix4 & mat )
+{
+    print( mat.getRow( 0 ) );
+    print( mat.getRow( 1 ) );
+    print( mat.getRow( 2 ) );
+    print( mat.getRow( 3 ) );
+}
+
+inline void print( const Matrix4 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Transform3::Transform3( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+}
+
+inline Transform3::Transform3( float scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+    mCol3 = Vector3( scalar );
+}
+
+inline Transform3::Transform3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2, const Vector3 & _col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Transform3::Transform3( const Matrix3 & tfrm, const Vector3 & translateVec )
+{
+    this->setUpper3x3( tfrm );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3::Transform3( const Quat & unitQuat, const Vector3 & translateVec )
+{
+    this->setUpper3x3( Matrix3( unitQuat ) );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3 & Transform3::setCol0( const Vector3 & _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol1( const Vector3 & _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol2( const Vector3 & _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol3( const Vector3 & _col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol( int col, const Vector3 & vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Transform3 & Transform3::setRow( int row, const Vector4 & vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Transform3 & Transform3::setElem( int col, int row, float val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline float Transform3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Transform3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Transform3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Transform3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Transform3::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector3 Transform3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Transform3::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector3 & Transform3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Transform3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+    return *this;
+}
+
+inline const Transform3 inverse( const Transform3 & tfrm )
+{
+    Vector3 tmp0, tmp1, tmp2, inv0, inv1, inv2;
+    float detinv;
+    tmp0 = cross( tfrm.getCol1(), tfrm.getCol2() );
+    tmp1 = cross( tfrm.getCol2(), tfrm.getCol0() );
+    tmp2 = cross( tfrm.getCol0(), tfrm.getCol1() );
+    detinv = ( 1.0f / dot( tfrm.getCol2(), tmp2 ) );
+    inv0 = Vector3( ( tmp0.getX() * detinv ), ( tmp1.getX() * detinv ), ( tmp2.getX() * detinv ) );
+    inv1 = Vector3( ( tmp0.getY() * detinv ), ( tmp1.getY() * detinv ), ( tmp2.getY() * detinv ) );
+    inv2 = Vector3( ( tmp0.getZ() * detinv ), ( tmp1.getZ() * detinv ), ( tmp2.getZ() * detinv ) );
+    return Transform3(
+        inv0,
+        inv1,
+        inv2,
+        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
+    );
+}
+
+inline const Transform3 orthoInverse( const Transform3 & tfrm )
+{
+    Vector3 inv0, inv1, inv2;
+    inv0 = Vector3( tfrm.getCol0().getX(), tfrm.getCol1().getX(), tfrm.getCol2().getX() );
+    inv1 = Vector3( tfrm.getCol0().getY(), tfrm.getCol1().getY(), tfrm.getCol2().getY() );
+    inv2 = Vector3( tfrm.getCol0().getZ(), tfrm.getCol1().getZ(), tfrm.getCol2().getZ() );
+    return Transform3(
+        inv0,
+        inv1,
+        inv2,
+        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
+    );
+}
+
+inline const Transform3 absPerElem( const Transform3 & tfrm )
+{
+    return Transform3(
+        absPerElem( tfrm.getCol0() ),
+        absPerElem( tfrm.getCol1() ),
+        absPerElem( tfrm.getCol2() ),
+        absPerElem( tfrm.getCol3() )
+    );
+}
+
+inline const Vector3 Transform3::operator *( const Vector3 & vec ) const
+{
+    return Vector3(
+        ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ),
+        ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ),
+        ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) )
+    );
+}
+
+inline const Point3 Transform3::operator *( const Point3 & pnt ) const
+{
+    return Point3(
+        ( ( ( ( mCol0.getX() * pnt.getX() ) + ( mCol1.getX() * pnt.getY() ) ) + ( mCol2.getX() * pnt.getZ() ) ) + mCol3.getX() ),
+        ( ( ( ( mCol0.getY() * pnt.getX() ) + ( mCol1.getY() * pnt.getY() ) ) + ( mCol2.getY() * pnt.getZ() ) ) + mCol3.getY() ),
+        ( ( ( ( mCol0.getZ() * pnt.getX() ) + ( mCol1.getZ() * pnt.getY() ) ) + ( mCol2.getZ() * pnt.getZ() ) ) + mCol3.getZ() )
+    );
+}
+
+inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
+{
+    return Transform3(
+        ( *this * tfrm.mCol0 ),
+        ( *this * tfrm.mCol1 ),
+        ( *this * tfrm.mCol2 ),
+        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
+    );
+}
+
+inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
+{
+    return Transform3(
+        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
+        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
+        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
+        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
+    );
+}
+
+inline const Transform3 Transform3::identity( )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        Vector3( 0.0f )
+    );
+}
+
+inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
+{
+    mCol0 = tfrm.getCol0();
+    mCol1 = tfrm.getCol1();
+    mCol2 = tfrm.getCol2();
+    return *this;
+}
+
+inline const Matrix3 Transform3::getUpper3x3( ) const
+{
+    return Matrix3( mCol0, mCol1, mCol2 );
+}
+
+inline Transform3 & Transform3::setTranslation( const Vector3 & translateVec )
+{
+    mCol3 = translateVec;
+    return *this;
+}
+
+inline const Vector3 Transform3::getTranslation( ) const
+{
+    return mCol3;
+}
+
+inline const Transform3 Transform3::rotationX( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3( 0.0f, c, s ),
+        Vector3( 0.0f, -s, c ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationY( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Transform3(
+        Vector3( c, 0.0f, -s ),
+        Vector3::yAxis( ),
+        Vector3( s, 0.0f, c ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationZ( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Transform3(
+        Vector3( c, s, 0.0f ),
+        Vector3( -s, c, 0.0f ),
+        Vector3::zAxis( ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationZYX( const Vector3 & radiansXYZ )
+{
+    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sX = sinf( radiansXYZ.getX() );
+    cX = cosf( radiansXYZ.getX() );
+    sY = sinf( radiansXYZ.getY() );
+    cY = cosf( radiansXYZ.getY() );
+    sZ = sinf( radiansXYZ.getZ() );
+    cZ = cosf( radiansXYZ.getZ() );
+    tmp0 = ( cZ * sY );
+    tmp1 = ( sZ * sY );
+    return Transform3(
+        Vector3( ( cZ * cY ), ( sZ * cY ), -sY ),
+        Vector3( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) ),
+        Vector3( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotation( float radians, const Vector3 & unitVec )
+{
+    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) );
+}
+
+inline const Transform3 Transform3::rotation( const Quat & unitQuat )
+{
+    return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) );
+}
+
+inline const Transform3 Transform3::scale( const Vector3 & scaleVec )
+{
+    return Transform3(
+        Vector3( scaleVec.getX(), 0.0f, 0.0f ),
+        Vector3( 0.0f, scaleVec.getY(), 0.0f ),
+        Vector3( 0.0f, 0.0f, scaleVec.getZ() ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec )
+{
+    return Transform3(
+        ( tfrm.getCol0() * scaleVec.getX( ) ),
+        ( tfrm.getCol1() * scaleVec.getY( ) ),
+        ( tfrm.getCol2() * scaleVec.getZ( ) ),
+        tfrm.getCol3()
+    );
+}
+
+inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm )
+{
+    return Transform3(
+        mulPerElem( tfrm.getCol0(), scaleVec ),
+        mulPerElem( tfrm.getCol1(), scaleVec ),
+        mulPerElem( tfrm.getCol2(), scaleVec ),
+        mulPerElem( tfrm.getCol3(), scaleVec )
+    );
+}
+
+inline const Transform3 Transform3::translation( const Vector3 & translateVec )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        translateVec
+    );
+}
+
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 )
+{
+    return Transform3(
+        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
+        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
+        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
+        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Transform3 & tfrm )
+{
+    print( tfrm.getRow( 0 ) );
+    print( tfrm.getRow( 1 ) );
+    print( tfrm.getRow( 2 ) );
+}
+
+inline void print( const Transform3 & tfrm, const char * name )
+{
+    printf("%s:\n", name);
+    print( tfrm );
+}
+
+#endif
+
+inline Quat::Quat( const Matrix3 & tfrm )
+{
+    float trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
+    int negTrace, ZgtX, ZgtY, YgtX;
+    int largestXorY, largestYorZ, largestZorX;
+
+    xx = tfrm.getCol0().getX();
+    yx = tfrm.getCol0().getY();
+    zx = tfrm.getCol0().getZ();
+    xy = tfrm.getCol1().getX();
+    yy = tfrm.getCol1().getY();
+    zy = tfrm.getCol1().getZ();
+    xz = tfrm.getCol2().getX();
+    yz = tfrm.getCol2().getY();
+    zz = tfrm.getCol2().getZ();
+
+    trace = ( ( xx + yy ) + zz );
+
+    negTrace = ( trace < 0.0f );
+    ZgtX = zz > xx;
+    ZgtY = zz > yy;
+    YgtX = yy > xx;
+    largestXorY = ( !ZgtX || !ZgtY ) && negTrace;
+    largestYorZ = ( YgtX || ZgtX ) && negTrace;
+    largestZorX = ( ZgtY || !YgtX ) && negTrace;
+    
+    if ( largestXorY )
+    {
+        zz = -zz;
+        xy = -xy;
+    }
+    if ( largestYorZ )
+    {
+        xx = -xx;
+        yz = -yz;
+    }
+    if ( largestZorX )
+    {
+        yy = -yy;
+        zx = -zx;
+    }
+
+    radicand = ( ( ( xx + yy ) + zz ) + 1.0f );
+    scale = ( 0.5f * ( 1.0f / sqrtf( radicand ) ) );
+
+    tmpx = ( ( zy - yz ) * scale );
+    tmpy = ( ( xz - zx ) * scale );
+    tmpz = ( ( yx - xy ) * scale );
+    tmpw = ( radicand * scale );
+    qx = tmpx;
+    qy = tmpy;
+    qz = tmpz;
+    qw = tmpw;
+
+    if ( largestXorY )
+    {
+        qx = tmpw;
+        qy = tmpz;
+        qz = tmpy;
+        qw = tmpx;
+    }
+    if ( largestYorZ )
+    {
+        tmpx = qx;
+        tmpz = qz;
+        qx = qy;
+        qy = tmpx;
+        qz = qw;
+        qw = tmpz;
+    }
+
+    mX = qx;
+    mY = qy;
+    mZ = qz;
+    mW = qw;
+}
+
+inline const Matrix3 outer( const Vector3 & tfrm0, const Vector3 & tfrm1 )
+{
+    return Matrix3(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) )
+    );
+}
+
+inline const Matrix4 outer( const Vector4 & tfrm0, const Vector4 & tfrm1 )
+{
+    return Matrix4(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) ),
+        ( tfrm0 * tfrm1.getW( ) )
+    );
+}
+
+inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat )
+{
+    return Vector3(
+        ( ( ( vec.getX() * mat.getCol0().getX() ) + ( vec.getY() * mat.getCol0().getY() ) ) + ( vec.getZ() * mat.getCol0().getZ() ) ),
+        ( ( ( vec.getX() * mat.getCol1().getX() ) + ( vec.getY() * mat.getCol1().getY() ) ) + ( vec.getZ() * mat.getCol1().getZ() ) ),
+        ( ( ( vec.getX() * mat.getCol2().getX() ) + ( vec.getY() * mat.getCol2().getY() ) ) + ( vec.getZ() * mat.getCol2().getZ() ) )
+    );
+}
+
+inline const Matrix3 crossMatrix( const Vector3 & vec )
+{
+    return Matrix3(
+        Vector3( 0.0f, vec.getZ(), -vec.getY() ),
+        Vector3( -vec.getZ(), 0.0f, vec.getX() ),
+        Vector3( vec.getY(), -vec.getX(), 0.0f )
+    );
+}
+
+inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat )
+{
+    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
+}
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/scalar/cpp/quat_aos.h b/Extras/vectormathlibrary/include/vectormath/scalar/cpp/quat_aos.h
index 603f522d9..7f1e8822b 100644
--- a/Extras/vectormathlibrary/include/vectormath/scalar/cpp/quat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/scalar/cpp/quat_aos.h
@@ -1,432 +1,432 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_AOS_CPP_H
-#define _VECTORMATH_QUAT_AOS_CPP_H
-//-----------------------------------------------------------------------------
-// Definitions
-
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-namespace Vectormath {
-namespace Aos {
-
-inline Quat::Quat( const Quat & quat )
-{
-    mX = quat.mX;
-    mY = quat.mY;
-    mZ = quat.mZ;
-    mW = quat.mW;
-}
-
-inline Quat::Quat( float _x, float _y, float _z, float _w )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-    mW = _w;
-}
-
-inline Quat::Quat( const Vector3 & xyz, float _w )
-{
-    this->setXYZ( xyz );
-    this->setW( _w );
-}
-
-inline Quat::Quat( const Vector4 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    mW = vec.getW();
-}
-
-inline Quat::Quat( float scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-    mW = scalar;
-}
-
-inline const Quat Quat::identity( )
-{
-    return Quat( 0.0f, 0.0f, 0.0f, 1.0f );
-}
-
-inline const Quat lerp( float t, const Quat & quat0, const Quat & quat1 )
-{
-    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
-}
-
-inline const Quat slerp( float t, const Quat & unitQuat0, const Quat & unitQuat1 )
-{
-    Quat start;
-    float recipSinAngle, scale0, scale1, cosAngle, angle;
-    cosAngle = dot( unitQuat0, unitQuat1 );
-    if ( cosAngle < 0.0f ) {
-        cosAngle = -cosAngle;
-        start = ( -unitQuat0 );
-    } else {
-        start = unitQuat0;
-    }
-    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
-        angle = acosf( cosAngle );
-        recipSinAngle = ( 1.0f / sinf( angle ) );
-        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
-        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
-    } else {
-        scale0 = ( 1.0f - t );
-        scale1 = t;
-    }
-    return ( ( start * scale0 ) + ( unitQuat1 * scale1 ) );
-}
-
-inline const Quat squad( float t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 )
-{
-    Quat tmp0, tmp1;
-    tmp0 = slerp( t, unitQuat0, unitQuat3 );
-    tmp1 = slerp( t, unitQuat1, unitQuat2 );
-    return slerp( ( ( 2.0f * t ) * ( 1.0f - t ) ), tmp0, tmp1 );
-}
-
-inline Quat & Quat::operator =( const Quat & quat )
-{
-    mX = quat.mX;
-    mY = quat.mY;
-    mZ = quat.mZ;
-    mW = quat.mW;
-    return *this;
-}
-
-inline Quat & Quat::setXYZ( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    return *this;
-}
-
-inline const Vector3 Quat::getXYZ( ) const
-{
-    return Vector3( mX, mY, mZ );
-}
-
-inline Quat & Quat::setX( float _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline float Quat::getX( ) const
-{
-    return mX;
-}
-
-inline Quat & Quat::setY( float _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline float Quat::getY( ) const
-{
-    return mY;
-}
-
-inline Quat & Quat::setZ( float _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline float Quat::getZ( ) const
-{
-    return mZ;
-}
-
-inline Quat & Quat::setW( float _w )
-{
-    mW = _w;
-    return *this;
-}
-
-inline float Quat::getW( ) const
-{
-    return mW;
-}
-
-inline Quat & Quat::setElem( int idx, float value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline float Quat::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline float & Quat::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline float Quat::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Quat Quat::operator +( const Quat & quat ) const
-{
-    return Quat(
-        ( mX + quat.mX ),
-        ( mY + quat.mY ),
-        ( mZ + quat.mZ ),
-        ( mW + quat.mW )
-    );
-}
-
-inline const Quat Quat::operator -( const Quat & quat ) const
-{
-    return Quat(
-        ( mX - quat.mX ),
-        ( mY - quat.mY ),
-        ( mZ - quat.mZ ),
-        ( mW - quat.mW )
-    );
-}
-
-inline const Quat Quat::operator *( float scalar ) const
-{
-    return Quat(
-        ( mX * scalar ),
-        ( mY * scalar ),
-        ( mZ * scalar ),
-        ( mW * scalar )
-    );
-}
-
-inline Quat & Quat::operator +=( const Quat & quat )
-{
-    *this = *this + quat;
-    return *this;
-}
-
-inline Quat & Quat::operator -=( const Quat & quat )
-{
-    *this = *this - quat;
-    return *this;
-}
-
-inline Quat & Quat::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Quat Quat::operator /( float scalar ) const
-{
-    return Quat(
-        ( mX / scalar ),
-        ( mY / scalar ),
-        ( mZ / scalar ),
-        ( mW / scalar )
-    );
-}
-
-inline Quat & Quat::operator /=( float scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Quat Quat::operator -( ) const
-{
-    return Quat(
-        -mX,
-        -mY,
-        -mZ,
-        -mW
-    );
-}
-
-inline const Quat operator *( float scalar, const Quat & quat )
-{
-    return quat * scalar;
-}
-
-inline float dot( const Quat & quat0, const Quat & quat1 )
-{
-    float result;
-    result = ( quat0.getX() * quat1.getX() );
-    result = ( result + ( quat0.getY() * quat1.getY() ) );
-    result = ( result + ( quat0.getZ() * quat1.getZ() ) );
-    result = ( result + ( quat0.getW() * quat1.getW() ) );
-    return result;
-}
-
-inline float norm( const Quat & quat )
-{
-    float result;
-    result = ( quat.getX() * quat.getX() );
-    result = ( result + ( quat.getY() * quat.getY() ) );
-    result = ( result + ( quat.getZ() * quat.getZ() ) );
-    result = ( result + ( quat.getW() * quat.getW() ) );
-    return result;
-}
-
-inline float length( const Quat & quat )
-{
-    return sqrtf( norm( quat ) );
-}
-
-inline const Quat normalize( const Quat & quat )
-{
-    float lenSqr, lenInv;
-    lenSqr = norm( quat );
-    lenInv = ( 1.0f / sqrtf( lenSqr ) );
-    return Quat(
-        ( quat.getX() * lenInv ),
-        ( quat.getY() * lenInv ),
-        ( quat.getZ() * lenInv ),
-        ( quat.getW() * lenInv )
-    );
-}
-
-inline const Quat Quat::rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 )
-{
-    float cosHalfAngleX2, recipCosHalfAngleX2;
-    cosHalfAngleX2 = sqrtf( ( 2.0f * ( 1.0f + dot( unitVec0, unitVec1 ) ) ) );
-    recipCosHalfAngleX2 = ( 1.0f / cosHalfAngleX2 );
-    return Quat( ( cross( unitVec0, unitVec1 ) * recipCosHalfAngleX2 ), ( cosHalfAngleX2 * 0.5f ) );
-}
-
-inline const Quat Quat::rotation( float radians, const Vector3 & unitVec )
-{
-    float s, c, angle;
-    angle = ( radians * 0.5f );
-    s = sinf( angle );
-    c = cosf( angle );
-    return Quat( ( unitVec * s ), c );
-}
-
-inline const Quat Quat::rotationX( float radians )
-{
-    float s, c, angle;
-    angle = ( radians * 0.5f );
-    s = sinf( angle );
-    c = cosf( angle );
-    return Quat( s, 0.0f, 0.0f, c );
-}
-
-inline const Quat Quat::rotationY( float radians )
-{
-    float s, c, angle;
-    angle = ( radians * 0.5f );
-    s = sinf( angle );
-    c = cosf( angle );
-    return Quat( 0.0f, s, 0.0f, c );
-}
-
-inline const Quat Quat::rotationZ( float radians )
-{
-    float s, c, angle;
-    angle = ( radians * 0.5f );
-    s = sinf( angle );
-    c = cosf( angle );
-    return Quat( 0.0f, 0.0f, s, c );
-}
-
-inline const Quat Quat::operator *( const Quat & quat ) const
-{
-    return Quat(
-        ( ( ( ( mW * quat.mX ) + ( mX * quat.mW ) ) + ( mY * quat.mZ ) ) - ( mZ * quat.mY ) ),
-        ( ( ( ( mW * quat.mY ) + ( mY * quat.mW ) ) + ( mZ * quat.mX ) ) - ( mX * quat.mZ ) ),
-        ( ( ( ( mW * quat.mZ ) + ( mZ * quat.mW ) ) + ( mX * quat.mY ) ) - ( mY * quat.mX ) ),
-        ( ( ( ( mW * quat.mW ) - ( mX * quat.mX ) ) - ( mY * quat.mY ) ) - ( mZ * quat.mZ ) )
-    );
-}
-
-inline Quat & Quat::operator *=( const Quat & quat )
-{
-    *this = *this * quat;
-    return *this;
-}
-
-inline const Vector3 rotate( const Quat & quat, const Vector3 & vec )
-{
-    float tmpX, tmpY, tmpZ, tmpW;
-    tmpX = ( ( ( quat.getW() * vec.getX() ) + ( quat.getY() * vec.getZ() ) ) - ( quat.getZ() * vec.getY() ) );
-    tmpY = ( ( ( quat.getW() * vec.getY() ) + ( quat.getZ() * vec.getX() ) ) - ( quat.getX() * vec.getZ() ) );
-    tmpZ = ( ( ( quat.getW() * vec.getZ() ) + ( quat.getX() * vec.getY() ) ) - ( quat.getY() * vec.getX() ) );
-    tmpW = ( ( ( quat.getX() * vec.getX() ) + ( quat.getY() * vec.getY() ) ) + ( quat.getZ() * vec.getZ() ) );
-    return Vector3(
-        ( ( ( ( tmpW * quat.getX() ) + ( tmpX * quat.getW() ) ) - ( tmpY * quat.getZ() ) ) + ( tmpZ * quat.getY() ) ),
-        ( ( ( ( tmpW * quat.getY() ) + ( tmpY * quat.getW() ) ) - ( tmpZ * quat.getX() ) ) + ( tmpX * quat.getZ() ) ),
-        ( ( ( ( tmpW * quat.getZ() ) + ( tmpZ * quat.getW() ) ) - ( tmpX * quat.getY() ) ) + ( tmpY * quat.getX() ) )
-    );
-}
-
-inline const Quat conj( const Quat & quat )
-{
-    return Quat( -quat.getX(), -quat.getY(), -quat.getZ(), quat.getW() );
-}
-
-inline const Quat select( const Quat & quat0, const Quat & quat1, bool select1 )
-{
-    return Quat(
-        ( select1 )? quat1.getX() : quat0.getX(),
-        ( select1 )? quat1.getY() : quat0.getY(),
-        ( select1 )? quat1.getZ() : quat0.getZ(),
-        ( select1 )? quat1.getW() : quat0.getW()
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Quat & quat )
-{
-    printf( "( %f %f %f %f )\n", quat.getX(), quat.getY(), quat.getZ(), quat.getW() );
-}
-
-inline void print( const Quat & quat, const char * name )
-{
-    printf( "%s: ( %f %f %f %f )\n", name, quat.getX(), quat.getY(), quat.getZ(), quat.getW() );
-}
-
-#endif
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_AOS_CPP_H
+#define _VECTORMATH_QUAT_AOS_CPP_H
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+namespace Vectormath {
+namespace Aos {
+
+inline Quat::Quat( const Quat & quat )
+{
+    mX = quat.mX;
+    mY = quat.mY;
+    mZ = quat.mZ;
+    mW = quat.mW;
+}
+
+inline Quat::Quat( float _x, float _y, float _z, float _w )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+    mW = _w;
+}
+
+inline Quat::Quat( const Vector3 & xyz, float _w )
+{
+    this->setXYZ( xyz );
+    this->setW( _w );
+}
+
+inline Quat::Quat( const Vector4 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    mW = vec.getW();
+}
+
+inline Quat::Quat( float scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+    mW = scalar;
+}
+
+inline const Quat Quat::identity( )
+{
+    return Quat( 0.0f, 0.0f, 0.0f, 1.0f );
+}
+
+inline const Quat lerp( float t, const Quat & quat0, const Quat & quat1 )
+{
+    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
+}
+
+inline const Quat slerp( float t, const Quat & unitQuat0, const Quat & unitQuat1 )
+{
+    Quat start;
+    float recipSinAngle, scale0, scale1, cosAngle, angle;
+    cosAngle = dot( unitQuat0, unitQuat1 );
+    if ( cosAngle < 0.0f ) {
+        cosAngle = -cosAngle;
+        start = ( -unitQuat0 );
+    } else {
+        start = unitQuat0;
+    }
+    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
+        angle = acosf( cosAngle );
+        recipSinAngle = ( 1.0f / sinf( angle ) );
+        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
+        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
+    } else {
+        scale0 = ( 1.0f - t );
+        scale1 = t;
+    }
+    return ( ( start * scale0 ) + ( unitQuat1 * scale1 ) );
+}
+
+inline const Quat squad( float t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 )
+{
+    Quat tmp0, tmp1;
+    tmp0 = slerp( t, unitQuat0, unitQuat3 );
+    tmp1 = slerp( t, unitQuat1, unitQuat2 );
+    return slerp( ( ( 2.0f * t ) * ( 1.0f - t ) ), tmp0, tmp1 );
+}
+
+inline Quat & Quat::operator =( const Quat & quat )
+{
+    mX = quat.mX;
+    mY = quat.mY;
+    mZ = quat.mZ;
+    mW = quat.mW;
+    return *this;
+}
+
+inline Quat & Quat::setXYZ( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    return *this;
+}
+
+inline const Vector3 Quat::getXYZ( ) const
+{
+    return Vector3( mX, mY, mZ );
+}
+
+inline Quat & Quat::setX( float _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline float Quat::getX( ) const
+{
+    return mX;
+}
+
+inline Quat & Quat::setY( float _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline float Quat::getY( ) const
+{
+    return mY;
+}
+
+inline Quat & Quat::setZ( float _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline float Quat::getZ( ) const
+{
+    return mZ;
+}
+
+inline Quat & Quat::setW( float _w )
+{
+    mW = _w;
+    return *this;
+}
+
+inline float Quat::getW( ) const
+{
+    return mW;
+}
+
+inline Quat & Quat::setElem( int idx, float value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline float Quat::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline float & Quat::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline float Quat::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Quat Quat::operator +( const Quat & quat ) const
+{
+    return Quat(
+        ( mX + quat.mX ),
+        ( mY + quat.mY ),
+        ( mZ + quat.mZ ),
+        ( mW + quat.mW )
+    );
+}
+
+inline const Quat Quat::operator -( const Quat & quat ) const
+{
+    return Quat(
+        ( mX - quat.mX ),
+        ( mY - quat.mY ),
+        ( mZ - quat.mZ ),
+        ( mW - quat.mW )
+    );
+}
+
+inline const Quat Quat::operator *( float scalar ) const
+{
+    return Quat(
+        ( mX * scalar ),
+        ( mY * scalar ),
+        ( mZ * scalar ),
+        ( mW * scalar )
+    );
+}
+
+inline Quat & Quat::operator +=( const Quat & quat )
+{
+    *this = *this + quat;
+    return *this;
+}
+
+inline Quat & Quat::operator -=( const Quat & quat )
+{
+    *this = *this - quat;
+    return *this;
+}
+
+inline Quat & Quat::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Quat Quat::operator /( float scalar ) const
+{
+    return Quat(
+        ( mX / scalar ),
+        ( mY / scalar ),
+        ( mZ / scalar ),
+        ( mW / scalar )
+    );
+}
+
+inline Quat & Quat::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Quat Quat::operator -( ) const
+{
+    return Quat(
+        -mX,
+        -mY,
+        -mZ,
+        -mW
+    );
+}
+
+inline const Quat operator *( float scalar, const Quat & quat )
+{
+    return quat * scalar;
+}
+
+inline float dot( const Quat & quat0, const Quat & quat1 )
+{
+    float result;
+    result = ( quat0.getX() * quat1.getX() );
+    result = ( result + ( quat0.getY() * quat1.getY() ) );
+    result = ( result + ( quat0.getZ() * quat1.getZ() ) );
+    result = ( result + ( quat0.getW() * quat1.getW() ) );
+    return result;
+}
+
+inline float norm( const Quat & quat )
+{
+    float result;
+    result = ( quat.getX() * quat.getX() );
+    result = ( result + ( quat.getY() * quat.getY() ) );
+    result = ( result + ( quat.getZ() * quat.getZ() ) );
+    result = ( result + ( quat.getW() * quat.getW() ) );
+    return result;
+}
+
+inline float length( const Quat & quat )
+{
+    return sqrtf( norm( quat ) );
+}
+
+inline const Quat normalize( const Quat & quat )
+{
+    float lenSqr, lenInv;
+    lenSqr = norm( quat );
+    lenInv = ( 1.0f / sqrtf( lenSqr ) );
+    return Quat(
+        ( quat.getX() * lenInv ),
+        ( quat.getY() * lenInv ),
+        ( quat.getZ() * lenInv ),
+        ( quat.getW() * lenInv )
+    );
+}
+
+inline const Quat Quat::rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 )
+{
+    float cosHalfAngleX2, recipCosHalfAngleX2;
+    cosHalfAngleX2 = sqrtf( ( 2.0f * ( 1.0f + dot( unitVec0, unitVec1 ) ) ) );
+    recipCosHalfAngleX2 = ( 1.0f / cosHalfAngleX2 );
+    return Quat( ( cross( unitVec0, unitVec1 ) * recipCosHalfAngleX2 ), ( cosHalfAngleX2 * 0.5f ) );
+}
+
+inline const Quat Quat::rotation( float radians, const Vector3 & unitVec )
+{
+    float s, c, angle;
+    angle = ( radians * 0.5f );
+    s = sinf( angle );
+    c = cosf( angle );
+    return Quat( ( unitVec * s ), c );
+}
+
+inline const Quat Quat::rotationX( float radians )
+{
+    float s, c, angle;
+    angle = ( radians * 0.5f );
+    s = sinf( angle );
+    c = cosf( angle );
+    return Quat( s, 0.0f, 0.0f, c );
+}
+
+inline const Quat Quat::rotationY( float radians )
+{
+    float s, c, angle;
+    angle = ( radians * 0.5f );
+    s = sinf( angle );
+    c = cosf( angle );
+    return Quat( 0.0f, s, 0.0f, c );
+}
+
+inline const Quat Quat::rotationZ( float radians )
+{
+    float s, c, angle;
+    angle = ( radians * 0.5f );
+    s = sinf( angle );
+    c = cosf( angle );
+    return Quat( 0.0f, 0.0f, s, c );
+}
+
+inline const Quat Quat::operator *( const Quat & quat ) const
+{
+    return Quat(
+        ( ( ( ( mW * quat.mX ) + ( mX * quat.mW ) ) + ( mY * quat.mZ ) ) - ( mZ * quat.mY ) ),
+        ( ( ( ( mW * quat.mY ) + ( mY * quat.mW ) ) + ( mZ * quat.mX ) ) - ( mX * quat.mZ ) ),
+        ( ( ( ( mW * quat.mZ ) + ( mZ * quat.mW ) ) + ( mX * quat.mY ) ) - ( mY * quat.mX ) ),
+        ( ( ( ( mW * quat.mW ) - ( mX * quat.mX ) ) - ( mY * quat.mY ) ) - ( mZ * quat.mZ ) )
+    );
+}
+
+inline Quat & Quat::operator *=( const Quat & quat )
+{
+    *this = *this * quat;
+    return *this;
+}
+
+inline const Vector3 rotate( const Quat & quat, const Vector3 & vec )
+{
+    float tmpX, tmpY, tmpZ, tmpW;
+    tmpX = ( ( ( quat.getW() * vec.getX() ) + ( quat.getY() * vec.getZ() ) ) - ( quat.getZ() * vec.getY() ) );
+    tmpY = ( ( ( quat.getW() * vec.getY() ) + ( quat.getZ() * vec.getX() ) ) - ( quat.getX() * vec.getZ() ) );
+    tmpZ = ( ( ( quat.getW() * vec.getZ() ) + ( quat.getX() * vec.getY() ) ) - ( quat.getY() * vec.getX() ) );
+    tmpW = ( ( ( quat.getX() * vec.getX() ) + ( quat.getY() * vec.getY() ) ) + ( quat.getZ() * vec.getZ() ) );
+    return Vector3(
+        ( ( ( ( tmpW * quat.getX() ) + ( tmpX * quat.getW() ) ) - ( tmpY * quat.getZ() ) ) + ( tmpZ * quat.getY() ) ),
+        ( ( ( ( tmpW * quat.getY() ) + ( tmpY * quat.getW() ) ) - ( tmpZ * quat.getX() ) ) + ( tmpX * quat.getZ() ) ),
+        ( ( ( ( tmpW * quat.getZ() ) + ( tmpZ * quat.getW() ) ) - ( tmpX * quat.getY() ) ) + ( tmpY * quat.getX() ) )
+    );
+}
+
+inline const Quat conj( const Quat & quat )
+{
+    return Quat( -quat.getX(), -quat.getY(), -quat.getZ(), quat.getW() );
+}
+
+inline const Quat select( const Quat & quat0, const Quat & quat1, bool select1 )
+{
+    return Quat(
+        ( select1 )? quat1.getX() : quat0.getX(),
+        ( select1 )? quat1.getY() : quat0.getY(),
+        ( select1 )? quat1.getZ() : quat0.getZ(),
+        ( select1 )? quat1.getW() : quat0.getW()
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Quat & quat )
+{
+    printf( "( %f %f %f %f )\n", quat.getX(), quat.getY(), quat.getZ(), quat.getW() );
+}
+
+inline void print( const Quat & quat, const char * name )
+{
+    printf( "%s: ( %f %f %f %f )\n", name, quat.getX(), quat.getY(), quat.getZ(), quat.getW() );
+}
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/scalar/cpp/vec_aos.h b/Extras/vectormathlibrary/include/vectormath/scalar/cpp/vec_aos.h
index 6b1b8cee3..a1a75333a 100644
--- a/Extras/vectormathlibrary/include/vectormath/scalar/cpp/vec_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/scalar/cpp/vec_aos.h
@@ -1,1173 +1,1173 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_AOS_CPP_H
-#define _VECTORMATH_VEC_AOS_CPP_H
-//-----------------------------------------------------------------------------
-// Constants
-
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-namespace Vectormath {
-namespace Aos {
-
-inline Vector3::Vector3( const Vector3 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-}
-
-inline Vector3::Vector3( float _x, float _y, float _z )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-}
-
-inline Vector3::Vector3( const Point3 & pnt )
-{
-    mX = pnt.getX();
-    mY = pnt.getY();
-    mZ = pnt.getZ();
-}
-
-inline Vector3::Vector3( float scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-}
-
-inline const Vector3 Vector3::xAxis( )
-{
-    return Vector3( 1.0f, 0.0f, 0.0f );
-}
-
-inline const Vector3 Vector3::yAxis( )
-{
-    return Vector3( 0.0f, 1.0f, 0.0f );
-}
-
-inline const Vector3 Vector3::zAxis( )
-{
-    return Vector3( 0.0f, 0.0f, 1.0f );
-}
-
-inline const Vector3 lerp( float t, const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
-}
-
-inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & unitVec1 )
-{
-    float recipSinAngle, scale0, scale1, cosAngle, angle;
-    cosAngle = dot( unitVec0, unitVec1 );
-    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
-        angle = acosf( cosAngle );
-        recipSinAngle = ( 1.0f / sinf( angle ) );
-        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
-        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
-    } else {
-        scale0 = ( 1.0f - t );
-        scale1 = t;
-    }
-    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
-}
-
-inline Vector3 & Vector3::operator =( const Vector3 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-    return *this;
-}
-
-inline Vector3 & Vector3::setX( float _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline float Vector3::getX( ) const
-{
-    return mX;
-}
-
-inline Vector3 & Vector3::setY( float _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline float Vector3::getY( ) const
-{
-    return mY;
-}
-
-inline Vector3 & Vector3::setZ( float _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline float Vector3::getZ( ) const
-{
-    return mZ;
-}
-
-inline Vector3 & Vector3::setElem( int idx, float value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline float Vector3::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline float & Vector3::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline float Vector3::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Vector3 Vector3::operator +( const Vector3 & vec ) const
-{
-    return Vector3(
-        ( mX + vec.mX ),
-        ( mY + vec.mY ),
-        ( mZ + vec.mZ )
-    );
-}
-
-inline const Vector3 Vector3::operator -( const Vector3 & vec ) const
-{
-    return Vector3(
-        ( mX - vec.mX ),
-        ( mY - vec.mY ),
-        ( mZ - vec.mZ )
-    );
-}
-
-inline const Point3 Vector3::operator +( const Point3 & pnt ) const
-{
-    return Point3(
-        ( mX + pnt.getX() ),
-        ( mY + pnt.getY() ),
-        ( mZ + pnt.getZ() )
-    );
-}
-
-inline const Vector3 Vector3::operator *( float scalar ) const
-{
-    return Vector3(
-        ( mX * scalar ),
-        ( mY * scalar ),
-        ( mZ * scalar )
-    );
-}
-
-inline Vector3 & Vector3::operator +=( const Vector3 & vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator -=( const Vector3 & vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Vector3 Vector3::operator /( float scalar ) const
-{
-    return Vector3(
-        ( mX / scalar ),
-        ( mY / scalar ),
-        ( mZ / scalar )
-    );
-}
-
-inline Vector3 & Vector3::operator /=( float scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Vector3 Vector3::operator -( ) const
-{
-    return Vector3(
-        -mX,
-        -mY,
-        -mZ
-    );
-}
-
-inline const Vector3 operator *( float scalar, const Vector3 & vec )
-{
-    return vec * scalar;
-}
-
-inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        ( vec0.getX() * vec1.getX() ),
-        ( vec0.getY() * vec1.getY() ),
-        ( vec0.getZ() * vec1.getZ() )
-    );
-}
-
-inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        ( vec0.getX() / vec1.getX() ),
-        ( vec0.getY() / vec1.getY() ),
-        ( vec0.getZ() / vec1.getZ() )
-    );
-}
-
-inline const Vector3 recipPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        ( 1.0f / vec.getX() ),
-        ( 1.0f / vec.getY() ),
-        ( 1.0f / vec.getZ() )
-    );
-}
-
-inline const Vector3 sqrtPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        sqrtf( vec.getX() ),
-        sqrtf( vec.getY() ),
-        sqrtf( vec.getZ() )
-    );
-}
-
-inline const Vector3 rsqrtPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        ( 1.0f / sqrtf( vec.getX() ) ),
-        ( 1.0f / sqrtf( vec.getY() ) ),
-        ( 1.0f / sqrtf( vec.getZ() ) )
-    );
-}
-
-inline const Vector3 absPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        fabsf( vec.getX() ),
-        fabsf( vec.getY() ),
-        fabsf( vec.getZ() )
-    );
-}
-
-inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        ( vec1.getX() < 0.0f )? -fabsf( vec0.getX() ) : fabsf( vec0.getX() ),
-        ( vec1.getY() < 0.0f )? -fabsf( vec0.getY() ) : fabsf( vec0.getY() ),
-        ( vec1.getZ() < 0.0f )? -fabsf( vec0.getZ() ) : fabsf( vec0.getZ() )
-    );
-}
-
-inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        (vec0.getX() > vec1.getX())? vec0.getX() : vec1.getX(),
-        (vec0.getY() > vec1.getY())? vec0.getY() : vec1.getY(),
-        (vec0.getZ() > vec1.getZ())? vec0.getZ() : vec1.getZ()
-    );
-}
-
-inline float maxElem( const Vector3 & vec )
-{
-    float result;
-    result = (vec.getX() > vec.getY())? vec.getX() : vec.getY();
-    result = (vec.getZ() > result)? vec.getZ() : result;
-    return result;
-}
-
-inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        (vec0.getX() < vec1.getX())? vec0.getX() : vec1.getX(),
-        (vec0.getY() < vec1.getY())? vec0.getY() : vec1.getY(),
-        (vec0.getZ() < vec1.getZ())? vec0.getZ() : vec1.getZ()
-    );
-}
-
-inline float minElem( const Vector3 & vec )
-{
-    float result;
-    result = (vec.getX() < vec.getY())? vec.getX() : vec.getY();
-    result = (vec.getZ() < result)? vec.getZ() : result;
-    return result;
-}
-
-inline float sum( const Vector3 & vec )
-{
-    float result;
-    result = ( vec.getX() + vec.getY() );
-    result = ( result + vec.getZ() );
-    return result;
-}
-
-inline float dot( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    float result;
-    result = ( vec0.getX() * vec1.getX() );
-    result = ( result + ( vec0.getY() * vec1.getY() ) );
-    result = ( result + ( vec0.getZ() * vec1.getZ() ) );
-    return result;
-}
-
-inline float lengthSqr( const Vector3 & vec )
-{
-    float result;
-    result = ( vec.getX() * vec.getX() );
-    result = ( result + ( vec.getY() * vec.getY() ) );
-    result = ( result + ( vec.getZ() * vec.getZ() ) );
-    return result;
-}
-
-inline float length( const Vector3 & vec )
-{
-    return sqrtf( lengthSqr( vec ) );
-}
-
-inline const Vector3 normalize( const Vector3 & vec )
-{
-    float lenSqr, lenInv;
-    lenSqr = lengthSqr( vec );
-    lenInv = ( 1.0f / sqrtf( lenSqr ) );
-    return Vector3(
-        ( vec.getX() * lenInv ),
-        ( vec.getY() * lenInv ),
-        ( vec.getZ() * lenInv )
-    );
-}
-
-inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        ( ( vec0.getY() * vec1.getZ() ) - ( vec0.getZ() * vec1.getY() ) ),
-        ( ( vec0.getZ() * vec1.getX() ) - ( vec0.getX() * vec1.getZ() ) ),
-        ( ( vec0.getX() * vec1.getY() ) - ( vec0.getY() * vec1.getX() ) )
-    );
-}
-
-inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, bool select1 )
-{
-    return Vector3(
-        ( select1 )? vec1.getX() : vec0.getX(),
-        ( select1 )? vec1.getY() : vec0.getY(),
-        ( select1 )? vec1.getZ() : vec0.getZ()
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Vector3 & vec )
-{
-    printf( "( %f %f %f )\n", vec.getX(), vec.getY(), vec.getZ() );
-}
-
-inline void print( const Vector3 & vec, const char * name )
-{
-    printf( "%s: ( %f %f %f )\n", name, vec.getX(), vec.getY(), vec.getZ() );
-}
-
-#endif
-
-inline Vector4::Vector4( const Vector4 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-    mW = vec.mW;
-}
-
-inline Vector4::Vector4( float _x, float _y, float _z, float _w )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-    mW = _w;
-}
-
-inline Vector4::Vector4( const Vector3 & xyz, float _w )
-{
-    this->setXYZ( xyz );
-    this->setW( _w );
-}
-
-inline Vector4::Vector4( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    mW = 0.0f;
-}
-
-inline Vector4::Vector4( const Point3 & pnt )
-{
-    mX = pnt.getX();
-    mY = pnt.getY();
-    mZ = pnt.getZ();
-    mW = 1.0f;
-}
-
-inline Vector4::Vector4( const Quat & quat )
-{
-    mX = quat.getX();
-    mY = quat.getY();
-    mZ = quat.getZ();
-    mW = quat.getW();
-}
-
-inline Vector4::Vector4( float scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-    mW = scalar;
-}
-
-inline const Vector4 Vector4::xAxis( )
-{
-    return Vector4( 1.0f, 0.0f, 0.0f, 0.0f );
-}
-
-inline const Vector4 Vector4::yAxis( )
-{
-    return Vector4( 0.0f, 1.0f, 0.0f, 0.0f );
-}
-
-inline const Vector4 Vector4::zAxis( )
-{
-    return Vector4( 0.0f, 0.0f, 1.0f, 0.0f );
-}
-
-inline const Vector4 Vector4::wAxis( )
-{
-    return Vector4( 0.0f, 0.0f, 0.0f, 1.0f );
-}
-
-inline const Vector4 lerp( float t, const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
-}
-
-inline const Vector4 slerp( float t, const Vector4 & unitVec0, const Vector4 & unitVec1 )
-{
-    float recipSinAngle, scale0, scale1, cosAngle, angle;
-    cosAngle = dot( unitVec0, unitVec1 );
-    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
-        angle = acosf( cosAngle );
-        recipSinAngle = ( 1.0f / sinf( angle ) );
-        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
-        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
-    } else {
-        scale0 = ( 1.0f - t );
-        scale1 = t;
-    }
-    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
-}
-
-inline Vector4 & Vector4::operator =( const Vector4 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-    mW = vec.mW;
-    return *this;
-}
-
-inline Vector4 & Vector4::setXYZ( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    return *this;
-}
-
-inline const Vector3 Vector4::getXYZ( ) const
-{
-    return Vector3( mX, mY, mZ );
-}
-
-inline Vector4 & Vector4::setX( float _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline float Vector4::getX( ) const
-{
-    return mX;
-}
-
-inline Vector4 & Vector4::setY( float _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline float Vector4::getY( ) const
-{
-    return mY;
-}
-
-inline Vector4 & Vector4::setZ( float _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline float Vector4::getZ( ) const
-{
-    return mZ;
-}
-
-inline Vector4 & Vector4::setW( float _w )
-{
-    mW = _w;
-    return *this;
-}
-
-inline float Vector4::getW( ) const
-{
-    return mW;
-}
-
-inline Vector4 & Vector4::setElem( int idx, float value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline float Vector4::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline float & Vector4::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline float Vector4::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Vector4 Vector4::operator +( const Vector4 & vec ) const
-{
-    return Vector4(
-        ( mX + vec.mX ),
-        ( mY + vec.mY ),
-        ( mZ + vec.mZ ),
-        ( mW + vec.mW )
-    );
-}
-
-inline const Vector4 Vector4::operator -( const Vector4 & vec ) const
-{
-    return Vector4(
-        ( mX - vec.mX ),
-        ( mY - vec.mY ),
-        ( mZ - vec.mZ ),
-        ( mW - vec.mW )
-    );
-}
-
-inline const Vector4 Vector4::operator *( float scalar ) const
-{
-    return Vector4(
-        ( mX * scalar ),
-        ( mY * scalar ),
-        ( mZ * scalar ),
-        ( mW * scalar )
-    );
-}
-
-inline Vector4 & Vector4::operator +=( const Vector4 & vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator -=( const Vector4 & vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Vector4 Vector4::operator /( float scalar ) const
-{
-    return Vector4(
-        ( mX / scalar ),
-        ( mY / scalar ),
-        ( mZ / scalar ),
-        ( mW / scalar )
-    );
-}
-
-inline Vector4 & Vector4::operator /=( float scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Vector4 Vector4::operator -( ) const
-{
-    return Vector4(
-        -mX,
-        -mY,
-        -mZ,
-        -mW
-    );
-}
-
-inline const Vector4 operator *( float scalar, const Vector4 & vec )
-{
-    return vec * scalar;
-}
-
-inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        ( vec0.getX() * vec1.getX() ),
-        ( vec0.getY() * vec1.getY() ),
-        ( vec0.getZ() * vec1.getZ() ),
-        ( vec0.getW() * vec1.getW() )
-    );
-}
-
-inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        ( vec0.getX() / vec1.getX() ),
-        ( vec0.getY() / vec1.getY() ),
-        ( vec0.getZ() / vec1.getZ() ),
-        ( vec0.getW() / vec1.getW() )
-    );
-}
-
-inline const Vector4 recipPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        ( 1.0f / vec.getX() ),
-        ( 1.0f / vec.getY() ),
-        ( 1.0f / vec.getZ() ),
-        ( 1.0f / vec.getW() )
-    );
-}
-
-inline const Vector4 sqrtPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        sqrtf( vec.getX() ),
-        sqrtf( vec.getY() ),
-        sqrtf( vec.getZ() ),
-        sqrtf( vec.getW() )
-    );
-}
-
-inline const Vector4 rsqrtPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        ( 1.0f / sqrtf( vec.getX() ) ),
-        ( 1.0f / sqrtf( vec.getY() ) ),
-        ( 1.0f / sqrtf( vec.getZ() ) ),
-        ( 1.0f / sqrtf( vec.getW() ) )
-    );
-}
-
-inline const Vector4 absPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        fabsf( vec.getX() ),
-        fabsf( vec.getY() ),
-        fabsf( vec.getZ() ),
-        fabsf( vec.getW() )
-    );
-}
-
-inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        ( vec1.getX() < 0.0f )? -fabsf( vec0.getX() ) : fabsf( vec0.getX() ),
-        ( vec1.getY() < 0.0f )? -fabsf( vec0.getY() ) : fabsf( vec0.getY() ),
-        ( vec1.getZ() < 0.0f )? -fabsf( vec0.getZ() ) : fabsf( vec0.getZ() ),
-        ( vec1.getW() < 0.0f )? -fabsf( vec0.getW() ) : fabsf( vec0.getW() )
-    );
-}
-
-inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        (vec0.getX() > vec1.getX())? vec0.getX() : vec1.getX(),
-        (vec0.getY() > vec1.getY())? vec0.getY() : vec1.getY(),
-        (vec0.getZ() > vec1.getZ())? vec0.getZ() : vec1.getZ(),
-        (vec0.getW() > vec1.getW())? vec0.getW() : vec1.getW()
-    );
-}
-
-inline float maxElem( const Vector4 & vec )
-{
-    float result;
-    result = (vec.getX() > vec.getY())? vec.getX() : vec.getY();
-    result = (vec.getZ() > result)? vec.getZ() : result;
-    result = (vec.getW() > result)? vec.getW() : result;
-    return result;
-}
-
-inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        (vec0.getX() < vec1.getX())? vec0.getX() : vec1.getX(),
-        (vec0.getY() < vec1.getY())? vec0.getY() : vec1.getY(),
-        (vec0.getZ() < vec1.getZ())? vec0.getZ() : vec1.getZ(),
-        (vec0.getW() < vec1.getW())? vec0.getW() : vec1.getW()
-    );
-}
-
-inline float minElem( const Vector4 & vec )
-{
-    float result;
-    result = (vec.getX() < vec.getY())? vec.getX() : vec.getY();
-    result = (vec.getZ() < result)? vec.getZ() : result;
-    result = (vec.getW() < result)? vec.getW() : result;
-    return result;
-}
-
-inline float sum( const Vector4 & vec )
-{
-    float result;
-    result = ( vec.getX() + vec.getY() );
-    result = ( result + vec.getZ() );
-    result = ( result + vec.getW() );
-    return result;
-}
-
-inline float dot( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    float result;
-    result = ( vec0.getX() * vec1.getX() );
-    result = ( result + ( vec0.getY() * vec1.getY() ) );
-    result = ( result + ( vec0.getZ() * vec1.getZ() ) );
-    result = ( result + ( vec0.getW() * vec1.getW() ) );
-    return result;
-}
-
-inline float lengthSqr( const Vector4 & vec )
-{
-    float result;
-    result = ( vec.getX() * vec.getX() );
-    result = ( result + ( vec.getY() * vec.getY() ) );
-    result = ( result + ( vec.getZ() * vec.getZ() ) );
-    result = ( result + ( vec.getW() * vec.getW() ) );
-    return result;
-}
-
-inline float length( const Vector4 & vec )
-{
-    return sqrtf( lengthSqr( vec ) );
-}
-
-inline const Vector4 normalize( const Vector4 & vec )
-{
-    float lenSqr, lenInv;
-    lenSqr = lengthSqr( vec );
-    lenInv = ( 1.0f / sqrtf( lenSqr ) );
-    return Vector4(
-        ( vec.getX() * lenInv ),
-        ( vec.getY() * lenInv ),
-        ( vec.getZ() * lenInv ),
-        ( vec.getW() * lenInv )
-    );
-}
-
-inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, bool select1 )
-{
-    return Vector4(
-        ( select1 )? vec1.getX() : vec0.getX(),
-        ( select1 )? vec1.getY() : vec0.getY(),
-        ( select1 )? vec1.getZ() : vec0.getZ(),
-        ( select1 )? vec1.getW() : vec0.getW()
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Vector4 & vec )
-{
-    printf( "( %f %f %f %f )\n", vec.getX(), vec.getY(), vec.getZ(), vec.getW() );
-}
-
-inline void print( const Vector4 & vec, const char * name )
-{
-    printf( "%s: ( %f %f %f %f )\n", name, vec.getX(), vec.getY(), vec.getZ(), vec.getW() );
-}
-
-#endif
-
-inline Point3::Point3( const Point3 & pnt )
-{
-    mX = pnt.mX;
-    mY = pnt.mY;
-    mZ = pnt.mZ;
-}
-
-inline Point3::Point3( float _x, float _y, float _z )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-}
-
-inline Point3::Point3( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-}
-
-inline Point3::Point3( float scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-}
-
-inline const Point3 lerp( float t, const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
-}
-
-inline Point3 & Point3::operator =( const Point3 & pnt )
-{
-    mX = pnt.mX;
-    mY = pnt.mY;
-    mZ = pnt.mZ;
-    return *this;
-}
-
-inline Point3 & Point3::setX( float _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline float Point3::getX( ) const
-{
-    return mX;
-}
-
-inline Point3 & Point3::setY( float _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline float Point3::getY( ) const
-{
-    return mY;
-}
-
-inline Point3 & Point3::setZ( float _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline float Point3::getZ( ) const
-{
-    return mZ;
-}
-
-inline Point3 & Point3::setElem( int idx, float value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline float Point3::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline float & Point3::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline float Point3::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Vector3 Point3::operator -( const Point3 & pnt ) const
-{
-    return Vector3(
-        ( mX - pnt.mX ),
-        ( mY - pnt.mY ),
-        ( mZ - pnt.mZ )
-    );
-}
-
-inline const Point3 Point3::operator +( const Vector3 & vec ) const
-{
-    return Point3(
-        ( mX + vec.getX() ),
-        ( mY + vec.getY() ),
-        ( mZ + vec.getZ() )
-    );
-}
-
-inline const Point3 Point3::operator -( const Vector3 & vec ) const
-{
-    return Point3(
-        ( mX - vec.getX() ),
-        ( mY - vec.getY() ),
-        ( mZ - vec.getZ() )
-    );
-}
-
-inline Point3 & Point3::operator +=( const Vector3 & vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Point3 & Point3::operator -=( const Vector3 & vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        ( pnt0.getX() * pnt1.getX() ),
-        ( pnt0.getY() * pnt1.getY() ),
-        ( pnt0.getZ() * pnt1.getZ() )
-    );
-}
-
-inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        ( pnt0.getX() / pnt1.getX() ),
-        ( pnt0.getY() / pnt1.getY() ),
-        ( pnt0.getZ() / pnt1.getZ() )
-    );
-}
-
-inline const Point3 recipPerElem( const Point3 & pnt )
-{
-    return Point3(
-        ( 1.0f / pnt.getX() ),
-        ( 1.0f / pnt.getY() ),
-        ( 1.0f / pnt.getZ() )
-    );
-}
-
-inline const Point3 sqrtPerElem( const Point3 & pnt )
-{
-    return Point3(
-        sqrtf( pnt.getX() ),
-        sqrtf( pnt.getY() ),
-        sqrtf( pnt.getZ() )
-    );
-}
-
-inline const Point3 rsqrtPerElem( const Point3 & pnt )
-{
-    return Point3(
-        ( 1.0f / sqrtf( pnt.getX() ) ),
-        ( 1.0f / sqrtf( pnt.getY() ) ),
-        ( 1.0f / sqrtf( pnt.getZ() ) )
-    );
-}
-
-inline const Point3 absPerElem( const Point3 & pnt )
-{
-    return Point3(
-        fabsf( pnt.getX() ),
-        fabsf( pnt.getY() ),
-        fabsf( pnt.getZ() )
-    );
-}
-
-inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        ( pnt1.getX() < 0.0f )? -fabsf( pnt0.getX() ) : fabsf( pnt0.getX() ),
-        ( pnt1.getY() < 0.0f )? -fabsf( pnt0.getY() ) : fabsf( pnt0.getY() ),
-        ( pnt1.getZ() < 0.0f )? -fabsf( pnt0.getZ() ) : fabsf( pnt0.getZ() )
-    );
-}
-
-inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        (pnt0.getX() > pnt1.getX())? pnt0.getX() : pnt1.getX(),
-        (pnt0.getY() > pnt1.getY())? pnt0.getY() : pnt1.getY(),
-        (pnt0.getZ() > pnt1.getZ())? pnt0.getZ() : pnt1.getZ()
-    );
-}
-
-inline float maxElem( const Point3 & pnt )
-{
-    float result;
-    result = (pnt.getX() > pnt.getY())? pnt.getX() : pnt.getY();
-    result = (pnt.getZ() > result)? pnt.getZ() : result;
-    return result;
-}
-
-inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        (pnt0.getX() < pnt1.getX())? pnt0.getX() : pnt1.getX(),
-        (pnt0.getY() < pnt1.getY())? pnt0.getY() : pnt1.getY(),
-        (pnt0.getZ() < pnt1.getZ())? pnt0.getZ() : pnt1.getZ()
-    );
-}
-
-inline float minElem( const Point3 & pnt )
-{
-    float result;
-    result = (pnt.getX() < pnt.getY())? pnt.getX() : pnt.getY();
-    result = (pnt.getZ() < result)? pnt.getZ() : result;
-    return result;
-}
-
-inline float sum( const Point3 & pnt )
-{
-    float result;
-    result = ( pnt.getX() + pnt.getY() );
-    result = ( result + pnt.getZ() );
-    return result;
-}
-
-inline const Point3 scale( const Point3 & pnt, float scaleVal )
-{
-    return mulPerElem( pnt, Point3( scaleVal ) );
-}
-
-inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec )
-{
-    return mulPerElem( pnt, Point3( scaleVec ) );
-}
-
-inline float projection( const Point3 & pnt, const Vector3 & unitVec )
-{
-    float result;
-    result = ( pnt.getX() * unitVec.getX() );
-    result = ( result + ( pnt.getY() * unitVec.getY() ) );
-    result = ( result + ( pnt.getZ() * unitVec.getZ() ) );
-    return result;
-}
-
-inline float distSqrFromOrigin( const Point3 & pnt )
-{
-    return lengthSqr( Vector3( pnt ) );
-}
-
-inline float distFromOrigin( const Point3 & pnt )
-{
-    return length( Vector3( pnt ) );
-}
-
-inline float distSqr( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return lengthSqr( ( pnt1 - pnt0 ) );
-}
-
-inline float dist( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return length( ( pnt1 - pnt0 ) );
-}
-
-inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, bool select1 )
-{
-    return Point3(
-        ( select1 )? pnt1.getX() : pnt0.getX(),
-        ( select1 )? pnt1.getY() : pnt0.getY(),
-        ( select1 )? pnt1.getZ() : pnt0.getZ()
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Point3 & pnt )
-{
-    printf( "( %f %f %f )\n", pnt.getX(), pnt.getY(), pnt.getZ() );
-}
-
-inline void print( const Point3 & pnt, const char * name )
-{
-    printf( "%s: ( %f %f %f )\n", name, pnt.getX(), pnt.getY(), pnt.getZ() );
-}
-
-#endif
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_AOS_CPP_H
+#define _VECTORMATH_VEC_AOS_CPP_H
+//-----------------------------------------------------------------------------
+// Constants
+
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+namespace Vectormath {
+namespace Aos {
+
+inline Vector3::Vector3( const Vector3 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+}
+
+inline Vector3::Vector3( float _x, float _y, float _z )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+}
+
+inline Vector3::Vector3( const Point3 & pnt )
+{
+    mX = pnt.getX();
+    mY = pnt.getY();
+    mZ = pnt.getZ();
+}
+
+inline Vector3::Vector3( float scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+}
+
+inline const Vector3 Vector3::xAxis( )
+{
+    return Vector3( 1.0f, 0.0f, 0.0f );
+}
+
+inline const Vector3 Vector3::yAxis( )
+{
+    return Vector3( 0.0f, 1.0f, 0.0f );
+}
+
+inline const Vector3 Vector3::zAxis( )
+{
+    return Vector3( 0.0f, 0.0f, 1.0f );
+}
+
+inline const Vector3 lerp( float t, const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & unitVec1 )
+{
+    float recipSinAngle, scale0, scale1, cosAngle, angle;
+    cosAngle = dot( unitVec0, unitVec1 );
+    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
+        angle = acosf( cosAngle );
+        recipSinAngle = ( 1.0f / sinf( angle ) );
+        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
+        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
+    } else {
+        scale0 = ( 1.0f - t );
+        scale1 = t;
+    }
+    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
+}
+
+inline Vector3 & Vector3::operator =( const Vector3 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+    return *this;
+}
+
+inline Vector3 & Vector3::setX( float _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline float Vector3::getX( ) const
+{
+    return mX;
+}
+
+inline Vector3 & Vector3::setY( float _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline float Vector3::getY( ) const
+{
+    return mY;
+}
+
+inline Vector3 & Vector3::setZ( float _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline float Vector3::getZ( ) const
+{
+    return mZ;
+}
+
+inline Vector3 & Vector3::setElem( int idx, float value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline float Vector3::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline float & Vector3::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline float Vector3::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Vector3 Vector3::operator +( const Vector3 & vec ) const
+{
+    return Vector3(
+        ( mX + vec.mX ),
+        ( mY + vec.mY ),
+        ( mZ + vec.mZ )
+    );
+}
+
+inline const Vector3 Vector3::operator -( const Vector3 & vec ) const
+{
+    return Vector3(
+        ( mX - vec.mX ),
+        ( mY - vec.mY ),
+        ( mZ - vec.mZ )
+    );
+}
+
+inline const Point3 Vector3::operator +( const Point3 & pnt ) const
+{
+    return Point3(
+        ( mX + pnt.getX() ),
+        ( mY + pnt.getY() ),
+        ( mZ + pnt.getZ() )
+    );
+}
+
+inline const Vector3 Vector3::operator *( float scalar ) const
+{
+    return Vector3(
+        ( mX * scalar ),
+        ( mY * scalar ),
+        ( mZ * scalar )
+    );
+}
+
+inline Vector3 & Vector3::operator +=( const Vector3 & vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator -=( const Vector3 & vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator /( float scalar ) const
+{
+    return Vector3(
+        ( mX / scalar ),
+        ( mY / scalar ),
+        ( mZ / scalar )
+    );
+}
+
+inline Vector3 & Vector3::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator -( ) const
+{
+    return Vector3(
+        -mX,
+        -mY,
+        -mZ
+    );
+}
+
+inline const Vector3 operator *( float scalar, const Vector3 & vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        ( vec0.getX() * vec1.getX() ),
+        ( vec0.getY() * vec1.getY() ),
+        ( vec0.getZ() * vec1.getZ() )
+    );
+}
+
+inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        ( vec0.getX() / vec1.getX() ),
+        ( vec0.getY() / vec1.getY() ),
+        ( vec0.getZ() / vec1.getZ() )
+    );
+}
+
+inline const Vector3 recipPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        ( 1.0f / vec.getX() ),
+        ( 1.0f / vec.getY() ),
+        ( 1.0f / vec.getZ() )
+    );
+}
+
+inline const Vector3 sqrtPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        sqrtf( vec.getX() ),
+        sqrtf( vec.getY() ),
+        sqrtf( vec.getZ() )
+    );
+}
+
+inline const Vector3 rsqrtPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        ( 1.0f / sqrtf( vec.getX() ) ),
+        ( 1.0f / sqrtf( vec.getY() ) ),
+        ( 1.0f / sqrtf( vec.getZ() ) )
+    );
+}
+
+inline const Vector3 absPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        fabsf( vec.getX() ),
+        fabsf( vec.getY() ),
+        fabsf( vec.getZ() )
+    );
+}
+
+inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        ( vec1.getX() < 0.0f )? -fabsf( vec0.getX() ) : fabsf( vec0.getX() ),
+        ( vec1.getY() < 0.0f )? -fabsf( vec0.getY() ) : fabsf( vec0.getY() ),
+        ( vec1.getZ() < 0.0f )? -fabsf( vec0.getZ() ) : fabsf( vec0.getZ() )
+    );
+}
+
+inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        (vec0.getX() > vec1.getX())? vec0.getX() : vec1.getX(),
+        (vec0.getY() > vec1.getY())? vec0.getY() : vec1.getY(),
+        (vec0.getZ() > vec1.getZ())? vec0.getZ() : vec1.getZ()
+    );
+}
+
+inline float maxElem( const Vector3 & vec )
+{
+    float result;
+    result = (vec.getX() > vec.getY())? vec.getX() : vec.getY();
+    result = (vec.getZ() > result)? vec.getZ() : result;
+    return result;
+}
+
+inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        (vec0.getX() < vec1.getX())? vec0.getX() : vec1.getX(),
+        (vec0.getY() < vec1.getY())? vec0.getY() : vec1.getY(),
+        (vec0.getZ() < vec1.getZ())? vec0.getZ() : vec1.getZ()
+    );
+}
+
+inline float minElem( const Vector3 & vec )
+{
+    float result;
+    result = (vec.getX() < vec.getY())? vec.getX() : vec.getY();
+    result = (vec.getZ() < result)? vec.getZ() : result;
+    return result;
+}
+
+inline float sum( const Vector3 & vec )
+{
+    float result;
+    result = ( vec.getX() + vec.getY() );
+    result = ( result + vec.getZ() );
+    return result;
+}
+
+inline float dot( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    float result;
+    result = ( vec0.getX() * vec1.getX() );
+    result = ( result + ( vec0.getY() * vec1.getY() ) );
+    result = ( result + ( vec0.getZ() * vec1.getZ() ) );
+    return result;
+}
+
+inline float lengthSqr( const Vector3 & vec )
+{
+    float result;
+    result = ( vec.getX() * vec.getX() );
+    result = ( result + ( vec.getY() * vec.getY() ) );
+    result = ( result + ( vec.getZ() * vec.getZ() ) );
+    return result;
+}
+
+inline float length( const Vector3 & vec )
+{
+    return sqrtf( lengthSqr( vec ) );
+}
+
+inline const Vector3 normalize( const Vector3 & vec )
+{
+    float lenSqr, lenInv;
+    lenSqr = lengthSqr( vec );
+    lenInv = ( 1.0f / sqrtf( lenSqr ) );
+    return Vector3(
+        ( vec.getX() * lenInv ),
+        ( vec.getY() * lenInv ),
+        ( vec.getZ() * lenInv )
+    );
+}
+
+inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        ( ( vec0.getY() * vec1.getZ() ) - ( vec0.getZ() * vec1.getY() ) ),
+        ( ( vec0.getZ() * vec1.getX() ) - ( vec0.getX() * vec1.getZ() ) ),
+        ( ( vec0.getX() * vec1.getY() ) - ( vec0.getY() * vec1.getX() ) )
+    );
+}
+
+inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, bool select1 )
+{
+    return Vector3(
+        ( select1 )? vec1.getX() : vec0.getX(),
+        ( select1 )? vec1.getY() : vec0.getY(),
+        ( select1 )? vec1.getZ() : vec0.getZ()
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Vector3 & vec )
+{
+    printf( "( %f %f %f )\n", vec.getX(), vec.getY(), vec.getZ() );
+}
+
+inline void print( const Vector3 & vec, const char * name )
+{
+    printf( "%s: ( %f %f %f )\n", name, vec.getX(), vec.getY(), vec.getZ() );
+}
+
+#endif
+
+inline Vector4::Vector4( const Vector4 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+    mW = vec.mW;
+}
+
+inline Vector4::Vector4( float _x, float _y, float _z, float _w )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+    mW = _w;
+}
+
+inline Vector4::Vector4( const Vector3 & xyz, float _w )
+{
+    this->setXYZ( xyz );
+    this->setW( _w );
+}
+
+inline Vector4::Vector4( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    mW = 0.0f;
+}
+
+inline Vector4::Vector4( const Point3 & pnt )
+{
+    mX = pnt.getX();
+    mY = pnt.getY();
+    mZ = pnt.getZ();
+    mW = 1.0f;
+}
+
+inline Vector4::Vector4( const Quat & quat )
+{
+    mX = quat.getX();
+    mY = quat.getY();
+    mZ = quat.getZ();
+    mW = quat.getW();
+}
+
+inline Vector4::Vector4( float scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+    mW = scalar;
+}
+
+inline const Vector4 Vector4::xAxis( )
+{
+    return Vector4( 1.0f, 0.0f, 0.0f, 0.0f );
+}
+
+inline const Vector4 Vector4::yAxis( )
+{
+    return Vector4( 0.0f, 1.0f, 0.0f, 0.0f );
+}
+
+inline const Vector4 Vector4::zAxis( )
+{
+    return Vector4( 0.0f, 0.0f, 1.0f, 0.0f );
+}
+
+inline const Vector4 Vector4::wAxis( )
+{
+    return Vector4( 0.0f, 0.0f, 0.0f, 1.0f );
+}
+
+inline const Vector4 lerp( float t, const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector4 slerp( float t, const Vector4 & unitVec0, const Vector4 & unitVec1 )
+{
+    float recipSinAngle, scale0, scale1, cosAngle, angle;
+    cosAngle = dot( unitVec0, unitVec1 );
+    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
+        angle = acosf( cosAngle );
+        recipSinAngle = ( 1.0f / sinf( angle ) );
+        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
+        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
+    } else {
+        scale0 = ( 1.0f - t );
+        scale1 = t;
+    }
+    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
+}
+
+inline Vector4 & Vector4::operator =( const Vector4 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+    mW = vec.mW;
+    return *this;
+}
+
+inline Vector4 & Vector4::setXYZ( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    return *this;
+}
+
+inline const Vector3 Vector4::getXYZ( ) const
+{
+    return Vector3( mX, mY, mZ );
+}
+
+inline Vector4 & Vector4::setX( float _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline float Vector4::getX( ) const
+{
+    return mX;
+}
+
+inline Vector4 & Vector4::setY( float _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline float Vector4::getY( ) const
+{
+    return mY;
+}
+
+inline Vector4 & Vector4::setZ( float _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline float Vector4::getZ( ) const
+{
+    return mZ;
+}
+
+inline Vector4 & Vector4::setW( float _w )
+{
+    mW = _w;
+    return *this;
+}
+
+inline float Vector4::getW( ) const
+{
+    return mW;
+}
+
+inline Vector4 & Vector4::setElem( int idx, float value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline float Vector4::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline float & Vector4::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline float Vector4::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Vector4 Vector4::operator +( const Vector4 & vec ) const
+{
+    return Vector4(
+        ( mX + vec.mX ),
+        ( mY + vec.mY ),
+        ( mZ + vec.mZ ),
+        ( mW + vec.mW )
+    );
+}
+
+inline const Vector4 Vector4::operator -( const Vector4 & vec ) const
+{
+    return Vector4(
+        ( mX - vec.mX ),
+        ( mY - vec.mY ),
+        ( mZ - vec.mZ ),
+        ( mW - vec.mW )
+    );
+}
+
+inline const Vector4 Vector4::operator *( float scalar ) const
+{
+    return Vector4(
+        ( mX * scalar ),
+        ( mY * scalar ),
+        ( mZ * scalar ),
+        ( mW * scalar )
+    );
+}
+
+inline Vector4 & Vector4::operator +=( const Vector4 & vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator -=( const Vector4 & vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator /( float scalar ) const
+{
+    return Vector4(
+        ( mX / scalar ),
+        ( mY / scalar ),
+        ( mZ / scalar ),
+        ( mW / scalar )
+    );
+}
+
+inline Vector4 & Vector4::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator -( ) const
+{
+    return Vector4(
+        -mX,
+        -mY,
+        -mZ,
+        -mW
+    );
+}
+
+inline const Vector4 operator *( float scalar, const Vector4 & vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        ( vec0.getX() * vec1.getX() ),
+        ( vec0.getY() * vec1.getY() ),
+        ( vec0.getZ() * vec1.getZ() ),
+        ( vec0.getW() * vec1.getW() )
+    );
+}
+
+inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        ( vec0.getX() / vec1.getX() ),
+        ( vec0.getY() / vec1.getY() ),
+        ( vec0.getZ() / vec1.getZ() ),
+        ( vec0.getW() / vec1.getW() )
+    );
+}
+
+inline const Vector4 recipPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        ( 1.0f / vec.getX() ),
+        ( 1.0f / vec.getY() ),
+        ( 1.0f / vec.getZ() ),
+        ( 1.0f / vec.getW() )
+    );
+}
+
+inline const Vector4 sqrtPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        sqrtf( vec.getX() ),
+        sqrtf( vec.getY() ),
+        sqrtf( vec.getZ() ),
+        sqrtf( vec.getW() )
+    );
+}
+
+inline const Vector4 rsqrtPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        ( 1.0f / sqrtf( vec.getX() ) ),
+        ( 1.0f / sqrtf( vec.getY() ) ),
+        ( 1.0f / sqrtf( vec.getZ() ) ),
+        ( 1.0f / sqrtf( vec.getW() ) )
+    );
+}
+
+inline const Vector4 absPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        fabsf( vec.getX() ),
+        fabsf( vec.getY() ),
+        fabsf( vec.getZ() ),
+        fabsf( vec.getW() )
+    );
+}
+
+inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        ( vec1.getX() < 0.0f )? -fabsf( vec0.getX() ) : fabsf( vec0.getX() ),
+        ( vec1.getY() < 0.0f )? -fabsf( vec0.getY() ) : fabsf( vec0.getY() ),
+        ( vec1.getZ() < 0.0f )? -fabsf( vec0.getZ() ) : fabsf( vec0.getZ() ),
+        ( vec1.getW() < 0.0f )? -fabsf( vec0.getW() ) : fabsf( vec0.getW() )
+    );
+}
+
+inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        (vec0.getX() > vec1.getX())? vec0.getX() : vec1.getX(),
+        (vec0.getY() > vec1.getY())? vec0.getY() : vec1.getY(),
+        (vec0.getZ() > vec1.getZ())? vec0.getZ() : vec1.getZ(),
+        (vec0.getW() > vec1.getW())? vec0.getW() : vec1.getW()
+    );
+}
+
+inline float maxElem( const Vector4 & vec )
+{
+    float result;
+    result = (vec.getX() > vec.getY())? vec.getX() : vec.getY();
+    result = (vec.getZ() > result)? vec.getZ() : result;
+    result = (vec.getW() > result)? vec.getW() : result;
+    return result;
+}
+
+inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        (vec0.getX() < vec1.getX())? vec0.getX() : vec1.getX(),
+        (vec0.getY() < vec1.getY())? vec0.getY() : vec1.getY(),
+        (vec0.getZ() < vec1.getZ())? vec0.getZ() : vec1.getZ(),
+        (vec0.getW() < vec1.getW())? vec0.getW() : vec1.getW()
+    );
+}
+
+inline float minElem( const Vector4 & vec )
+{
+    float result;
+    result = (vec.getX() < vec.getY())? vec.getX() : vec.getY();
+    result = (vec.getZ() < result)? vec.getZ() : result;
+    result = (vec.getW() < result)? vec.getW() : result;
+    return result;
+}
+
+inline float sum( const Vector4 & vec )
+{
+    float result;
+    result = ( vec.getX() + vec.getY() );
+    result = ( result + vec.getZ() );
+    result = ( result + vec.getW() );
+    return result;
+}
+
+inline float dot( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    float result;
+    result = ( vec0.getX() * vec1.getX() );
+    result = ( result + ( vec0.getY() * vec1.getY() ) );
+    result = ( result + ( vec0.getZ() * vec1.getZ() ) );
+    result = ( result + ( vec0.getW() * vec1.getW() ) );
+    return result;
+}
+
+inline float lengthSqr( const Vector4 & vec )
+{
+    float result;
+    result = ( vec.getX() * vec.getX() );
+    result = ( result + ( vec.getY() * vec.getY() ) );
+    result = ( result + ( vec.getZ() * vec.getZ() ) );
+    result = ( result + ( vec.getW() * vec.getW() ) );
+    return result;
+}
+
+inline float length( const Vector4 & vec )
+{
+    return sqrtf( lengthSqr( vec ) );
+}
+
+inline const Vector4 normalize( const Vector4 & vec )
+{
+    float lenSqr, lenInv;
+    lenSqr = lengthSqr( vec );
+    lenInv = ( 1.0f / sqrtf( lenSqr ) );
+    return Vector4(
+        ( vec.getX() * lenInv ),
+        ( vec.getY() * lenInv ),
+        ( vec.getZ() * lenInv ),
+        ( vec.getW() * lenInv )
+    );
+}
+
+inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, bool select1 )
+{
+    return Vector4(
+        ( select1 )? vec1.getX() : vec0.getX(),
+        ( select1 )? vec1.getY() : vec0.getY(),
+        ( select1 )? vec1.getZ() : vec0.getZ(),
+        ( select1 )? vec1.getW() : vec0.getW()
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Vector4 & vec )
+{
+    printf( "( %f %f %f %f )\n", vec.getX(), vec.getY(), vec.getZ(), vec.getW() );
+}
+
+inline void print( const Vector4 & vec, const char * name )
+{
+    printf( "%s: ( %f %f %f %f )\n", name, vec.getX(), vec.getY(), vec.getZ(), vec.getW() );
+}
+
+#endif
+
+inline Point3::Point3( const Point3 & pnt )
+{
+    mX = pnt.mX;
+    mY = pnt.mY;
+    mZ = pnt.mZ;
+}
+
+inline Point3::Point3( float _x, float _y, float _z )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+}
+
+inline Point3::Point3( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+}
+
+inline Point3::Point3( float scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+}
+
+inline const Point3 lerp( float t, const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
+}
+
+inline Point3 & Point3::operator =( const Point3 & pnt )
+{
+    mX = pnt.mX;
+    mY = pnt.mY;
+    mZ = pnt.mZ;
+    return *this;
+}
+
+inline Point3 & Point3::setX( float _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline float Point3::getX( ) const
+{
+    return mX;
+}
+
+inline Point3 & Point3::setY( float _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline float Point3::getY( ) const
+{
+    return mY;
+}
+
+inline Point3 & Point3::setZ( float _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline float Point3::getZ( ) const
+{
+    return mZ;
+}
+
+inline Point3 & Point3::setElem( int idx, float value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline float Point3::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline float & Point3::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline float Point3::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Vector3 Point3::operator -( const Point3 & pnt ) const
+{
+    return Vector3(
+        ( mX - pnt.mX ),
+        ( mY - pnt.mY ),
+        ( mZ - pnt.mZ )
+    );
+}
+
+inline const Point3 Point3::operator +( const Vector3 & vec ) const
+{
+    return Point3(
+        ( mX + vec.getX() ),
+        ( mY + vec.getY() ),
+        ( mZ + vec.getZ() )
+    );
+}
+
+inline const Point3 Point3::operator -( const Vector3 & vec ) const
+{
+    return Point3(
+        ( mX - vec.getX() ),
+        ( mY - vec.getY() ),
+        ( mZ - vec.getZ() )
+    );
+}
+
+inline Point3 & Point3::operator +=( const Vector3 & vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Point3 & Point3::operator -=( const Vector3 & vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        ( pnt0.getX() * pnt1.getX() ),
+        ( pnt0.getY() * pnt1.getY() ),
+        ( pnt0.getZ() * pnt1.getZ() )
+    );
+}
+
+inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        ( pnt0.getX() / pnt1.getX() ),
+        ( pnt0.getY() / pnt1.getY() ),
+        ( pnt0.getZ() / pnt1.getZ() )
+    );
+}
+
+inline const Point3 recipPerElem( const Point3 & pnt )
+{
+    return Point3(
+        ( 1.0f / pnt.getX() ),
+        ( 1.0f / pnt.getY() ),
+        ( 1.0f / pnt.getZ() )
+    );
+}
+
+inline const Point3 sqrtPerElem( const Point3 & pnt )
+{
+    return Point3(
+        sqrtf( pnt.getX() ),
+        sqrtf( pnt.getY() ),
+        sqrtf( pnt.getZ() )
+    );
+}
+
+inline const Point3 rsqrtPerElem( const Point3 & pnt )
+{
+    return Point3(
+        ( 1.0f / sqrtf( pnt.getX() ) ),
+        ( 1.0f / sqrtf( pnt.getY() ) ),
+        ( 1.0f / sqrtf( pnt.getZ() ) )
+    );
+}
+
+inline const Point3 absPerElem( const Point3 & pnt )
+{
+    return Point3(
+        fabsf( pnt.getX() ),
+        fabsf( pnt.getY() ),
+        fabsf( pnt.getZ() )
+    );
+}
+
+inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        ( pnt1.getX() < 0.0f )? -fabsf( pnt0.getX() ) : fabsf( pnt0.getX() ),
+        ( pnt1.getY() < 0.0f )? -fabsf( pnt0.getY() ) : fabsf( pnt0.getY() ),
+        ( pnt1.getZ() < 0.0f )? -fabsf( pnt0.getZ() ) : fabsf( pnt0.getZ() )
+    );
+}
+
+inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        (pnt0.getX() > pnt1.getX())? pnt0.getX() : pnt1.getX(),
+        (pnt0.getY() > pnt1.getY())? pnt0.getY() : pnt1.getY(),
+        (pnt0.getZ() > pnt1.getZ())? pnt0.getZ() : pnt1.getZ()
+    );
+}
+
+inline float maxElem( const Point3 & pnt )
+{
+    float result;
+    result = (pnt.getX() > pnt.getY())? pnt.getX() : pnt.getY();
+    result = (pnt.getZ() > result)? pnt.getZ() : result;
+    return result;
+}
+
+inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        (pnt0.getX() < pnt1.getX())? pnt0.getX() : pnt1.getX(),
+        (pnt0.getY() < pnt1.getY())? pnt0.getY() : pnt1.getY(),
+        (pnt0.getZ() < pnt1.getZ())? pnt0.getZ() : pnt1.getZ()
+    );
+}
+
+inline float minElem( const Point3 & pnt )
+{
+    float result;
+    result = (pnt.getX() < pnt.getY())? pnt.getX() : pnt.getY();
+    result = (pnt.getZ() < result)? pnt.getZ() : result;
+    return result;
+}
+
+inline float sum( const Point3 & pnt )
+{
+    float result;
+    result = ( pnt.getX() + pnt.getY() );
+    result = ( result + pnt.getZ() );
+    return result;
+}
+
+inline const Point3 scale( const Point3 & pnt, float scaleVal )
+{
+    return mulPerElem( pnt, Point3( scaleVal ) );
+}
+
+inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec )
+{
+    return mulPerElem( pnt, Point3( scaleVec ) );
+}
+
+inline float projection( const Point3 & pnt, const Vector3 & unitVec )
+{
+    float result;
+    result = ( pnt.getX() * unitVec.getX() );
+    result = ( result + ( pnt.getY() * unitVec.getY() ) );
+    result = ( result + ( pnt.getZ() * unitVec.getZ() ) );
+    return result;
+}
+
+inline float distSqrFromOrigin( const Point3 & pnt )
+{
+    return lengthSqr( Vector3( pnt ) );
+}
+
+inline float distFromOrigin( const Point3 & pnt )
+{
+    return length( Vector3( pnt ) );
+}
+
+inline float distSqr( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return lengthSqr( ( pnt1 - pnt0 ) );
+}
+
+inline float dist( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return length( ( pnt1 - pnt0 ) );
+}
+
+inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, bool select1 )
+{
+    return Point3(
+        ( select1 )? pnt1.getX() : pnt0.getX(),
+        ( select1 )? pnt1.getY() : pnt0.getY(),
+        ( select1 )? pnt1.getZ() : pnt0.getZ()
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Point3 & pnt )
+{
+    printf( "( %f %f %f )\n", pnt.getX(), pnt.getY(), pnt.getZ() );
+}
+
+inline void print( const Point3 & pnt, const char * name )
+{
+    printf( "%s: ( %f %f %f )\n", name, pnt.getX(), pnt.getY(), pnt.getZ() );
+}
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/scalar/cpp/vectormath_aos.h b/Extras/vectormathlibrary/include/vectormath/scalar/cpp/vectormath_aos.h
index a41d22ad2..7913c11ea 100644
--- a/Extras/vectormathlibrary/include/vectormath/scalar/cpp/vectormath_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/scalar/cpp/vectormath_aos.h
@@ -1,1809 +1,1809 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_AOS_CPP_SCALAR_H
-#define _VECTORMATH_AOS_CPP_SCALAR_H
-
-#include <math.h>
-
-#ifdef _VECTORMATH_DEBUG
-#include <stdio.h>
-#endif
-
-namespace Vectormath {
-
-namespace Aos {
-
-//-----------------------------------------------------------------------------
-// Forward Declarations
-//
-
-class Vector3;
-class Vector4;
-class Point3;
-class Quat;
-class Matrix3;
-class Matrix4;
-class Transform3;
-
-// A 3-D vector in array-of-structures format
-//
-class Vector3
-{
-    float mX;
-    float mY;
-    float mZ;
-#ifndef __GNUC__
-    float d;
-#endif
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Vector3( ) { };
-
-    // Copy a 3-D vector
-    // 
-    inline Vector3( const Vector3 & vec );
-
-    // Construct a 3-D vector from x, y, and z elements
-    // 
-    inline Vector3( float x, float y, float z );
-
-    // Copy elements from a 3-D point into a 3-D vector
-    // 
-    explicit inline Vector3( const Point3 & pnt );
-
-    // Set all elements of a 3-D vector to the same scalar value
-    // 
-    explicit inline Vector3( float scalar );
-
-    // Assign one 3-D vector to another
-    // 
-    inline Vector3 & operator =( const Vector3 & vec );
-
-    // Set the x element of a 3-D vector
-    // 
-    inline Vector3 & setX( float x );
-
-    // Set the y element of a 3-D vector
-    // 
-    inline Vector3 & setY( float y );
-
-    // Set the z element of a 3-D vector
-    // 
-    inline Vector3 & setZ( float z );
-
-    // Get the x element of a 3-D vector
-    // 
-    inline float getX( ) const;
-
-    // Get the y element of a 3-D vector
-    // 
-    inline float getY( ) const;
-
-    // Get the z element of a 3-D vector
-    // 
-    inline float getZ( ) const;
-
-    // Set an x, y, or z element of a 3-D vector by index
-    // 
-    inline Vector3 & setElem( int idx, float value );
-
-    // Get an x, y, or z element of a 3-D vector by index
-    // 
-    inline float getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline float & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline float operator []( int idx ) const;
-
-    // Add two 3-D vectors
-    // 
-    inline const Vector3 operator +( const Vector3 & vec ) const;
-
-    // Subtract a 3-D vector from another 3-D vector
-    // 
-    inline const Vector3 operator -( const Vector3 & vec ) const;
-
-    // Add a 3-D vector to a 3-D point
-    // 
-    inline const Point3 operator +( const Point3 & pnt ) const;
-
-    // Multiply a 3-D vector by a scalar
-    // 
-    inline const Vector3 operator *( float scalar ) const;
-
-    // Divide a 3-D vector by a scalar
-    // 
-    inline const Vector3 operator /( float scalar ) const;
-
-    // Perform compound assignment and addition with a 3-D vector
-    // 
-    inline Vector3 & operator +=( const Vector3 & vec );
-
-    // Perform compound assignment and subtraction by a 3-D vector
-    // 
-    inline Vector3 & operator -=( const Vector3 & vec );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Vector3 & operator *=( float scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Vector3 & operator /=( float scalar );
-
-    // Negate all elements of a 3-D vector
-    // 
-    inline const Vector3 operator -( ) const;
-
-    // Construct x axis
-    // 
-    static inline const Vector3 xAxis( );
-
-    // Construct y axis
-    // 
-    static inline const Vector3 yAxis( );
-
-    // Construct z axis
-    // 
-    static inline const Vector3 zAxis( );
-
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-;
-
-// Multiply a 3-D vector by a scalar
-// 
-inline const Vector3 operator *( float scalar, const Vector3 & vec );
-
-// Multiply two 3-D vectors per element
-// 
-inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Divide two 3-D vectors per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Compute the reciprocal of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Vector3 recipPerElem( const Vector3 & vec );
-
-// Compute the square root of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Vector3 sqrtPerElem( const Vector3 & vec );
-
-// Compute the reciprocal square root of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Vector3 rsqrtPerElem( const Vector3 & vec );
-
-// Compute the absolute value of a 3-D vector per element
-// 
-inline const Vector3 absPerElem( const Vector3 & vec );
-
-// Copy sign from one 3-D vector to another, per element
-// 
-inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Maximum of two 3-D vectors per element
-// 
-inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Minimum of two 3-D vectors per element
-// 
-inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Maximum element of a 3-D vector
-// 
-inline float maxElem( const Vector3 & vec );
-
-// Minimum element of a 3-D vector
-// 
-inline float minElem( const Vector3 & vec );
-
-// Compute the sum of all elements of a 3-D vector
-// 
-inline float sum( const Vector3 & vec );
-
-// Compute the dot product of two 3-D vectors
-// 
-inline float dot( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Compute the square of the length of a 3-D vector
-// 
-inline float lengthSqr( const Vector3 & vec );
-
-// Compute the length of a 3-D vector
-// 
-inline float length( const Vector3 & vec );
-
-// Normalize a 3-D vector
-// NOTE: 
-// The result is unpredictable when all elements of vec are at or near zero.
-// 
-inline const Vector3 normalize( const Vector3 & vec );
-
-// Compute cross product of two 3-D vectors
-// 
-inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Outer product of two 3-D vectors
-// 
-inline const Matrix3 outer( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Pre-multiply a row vector by a 3x3 matrix
-// 
-inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat );
-
-// Cross-product matrix of a 3-D vector
-// 
-inline const Matrix3 crossMatrix( const Vector3 & vec );
-
-// Create cross-product matrix and multiply
-// NOTE: 
-// Faster than separately creating a cross-product matrix and multiplying.
-// 
-inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat );
-
-// Linear interpolation between two 3-D vectors
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 lerp( float t, const Vector3 & vec0, const Vector3 & vec1 );
-
-// Spherical linear interpolation between two 3-D vectors
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & unitVec1 );
-
-// Conditionally select between two 3-D vectors
-// 
-inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, bool select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3-D vector
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector3 & vec );
-
-// Print a 3-D vector and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector3 & vec, const char * name );
-
-#endif
-
-// A 4-D vector in array-of-structures format
-//
-class Vector4
-{
-    float mX;
-    float mY;
-    float mZ;
-    float mW;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Vector4( ) { };
-
-    // Copy a 4-D vector
-    // 
-    inline Vector4( const Vector4 & vec );
-
-    // Construct a 4-D vector from x, y, z, and w elements
-    // 
-    inline Vector4( float x, float y, float z, float w );
-
-    // Construct a 4-D vector from a 3-D vector and a scalar
-    // 
-    inline Vector4( const Vector3 & xyz, float w );
-
-    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
-    // 
-    explicit inline Vector4( const Vector3 & vec );
-
-    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
-    // 
-    explicit inline Vector4( const Point3 & pnt );
-
-    // Copy elements from a quaternion into a 4-D vector
-    // 
-    explicit inline Vector4( const Quat & quat );
-
-    // Set all elements of a 4-D vector to the same scalar value
-    // 
-    explicit inline Vector4( float scalar );
-
-    // Assign one 4-D vector to another
-    // 
-    inline Vector4 & operator =( const Vector4 & vec );
-
-    // Set the x, y, and z elements of a 4-D vector
-    // NOTE: 
-    // This function does not change the w element.
-    // 
-    inline Vector4 & setXYZ( const Vector3 & vec );
-
-    // Get the x, y, and z elements of a 4-D vector
-    // 
-    inline const Vector3 getXYZ( ) const;
-
-    // Set the x element of a 4-D vector
-    // 
-    inline Vector4 & setX( float x );
-
-    // Set the y element of a 4-D vector
-    // 
-    inline Vector4 & setY( float y );
-
-    // Set the z element of a 4-D vector
-    // 
-    inline Vector4 & setZ( float z );
-
-    // Set the w element of a 4-D vector
-    // 
-    inline Vector4 & setW( float w );
-
-    // Get the x element of a 4-D vector
-    // 
-    inline float getX( ) const;
-
-    // Get the y element of a 4-D vector
-    // 
-    inline float getY( ) const;
-
-    // Get the z element of a 4-D vector
-    // 
-    inline float getZ( ) const;
-
-    // Get the w element of a 4-D vector
-    // 
-    inline float getW( ) const;
-
-    // Set an x, y, z, or w element of a 4-D vector by index
-    // 
-    inline Vector4 & setElem( int idx, float value );
-
-    // Get an x, y, z, or w element of a 4-D vector by index
-    // 
-    inline float getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline float & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline float operator []( int idx ) const;
-
-    // Add two 4-D vectors
-    // 
-    inline const Vector4 operator +( const Vector4 & vec ) const;
-
-    // Subtract a 4-D vector from another 4-D vector
-    // 
-    inline const Vector4 operator -( const Vector4 & vec ) const;
-
-    // Multiply a 4-D vector by a scalar
-    // 
-    inline const Vector4 operator *( float scalar ) const;
-
-    // Divide a 4-D vector by a scalar
-    // 
-    inline const Vector4 operator /( float scalar ) const;
-
-    // Perform compound assignment and addition with a 4-D vector
-    // 
-    inline Vector4 & operator +=( const Vector4 & vec );
-
-    // Perform compound assignment and subtraction by a 4-D vector
-    // 
-    inline Vector4 & operator -=( const Vector4 & vec );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Vector4 & operator *=( float scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Vector4 & operator /=( float scalar );
-
-    // Negate all elements of a 4-D vector
-    // 
-    inline const Vector4 operator -( ) const;
-
-    // Construct x axis
-    // 
-    static inline const Vector4 xAxis( );
-
-    // Construct y axis
-    // 
-    static inline const Vector4 yAxis( );
-
-    // Construct z axis
-    // 
-    static inline const Vector4 zAxis( );
-
-    // Construct w axis
-    // 
-    static inline const Vector4 wAxis( );
-
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-;
-
-// Multiply a 4-D vector by a scalar
-// 
-inline const Vector4 operator *( float scalar, const Vector4 & vec );
-
-// Multiply two 4-D vectors per element
-// 
-inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Divide two 4-D vectors per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Compute the reciprocal of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Vector4 recipPerElem( const Vector4 & vec );
-
-// Compute the square root of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Vector4 sqrtPerElem( const Vector4 & vec );
-
-// Compute the reciprocal square root of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Vector4 rsqrtPerElem( const Vector4 & vec );
-
-// Compute the absolute value of a 4-D vector per element
-// 
-inline const Vector4 absPerElem( const Vector4 & vec );
-
-// Copy sign from one 4-D vector to another, per element
-// 
-inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Maximum of two 4-D vectors per element
-// 
-inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Minimum of two 4-D vectors per element
-// 
-inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Maximum element of a 4-D vector
-// 
-inline float maxElem( const Vector4 & vec );
-
-// Minimum element of a 4-D vector
-// 
-inline float minElem( const Vector4 & vec );
-
-// Compute the sum of all elements of a 4-D vector
-// 
-inline float sum( const Vector4 & vec );
-
-// Compute the dot product of two 4-D vectors
-// 
-inline float dot( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Compute the square of the length of a 4-D vector
-// 
-inline float lengthSqr( const Vector4 & vec );
-
-// Compute the length of a 4-D vector
-// 
-inline float length( const Vector4 & vec );
-
-// Normalize a 4-D vector
-// NOTE: 
-// The result is unpredictable when all elements of vec are at or near zero.
-// 
-inline const Vector4 normalize( const Vector4 & vec );
-
-// Outer product of two 4-D vectors
-// 
-inline const Matrix4 outer( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Linear interpolation between two 4-D vectors
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 lerp( float t, const Vector4 & vec0, const Vector4 & vec1 );
-
-// Spherical linear interpolation between two 4-D vectors
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 slerp( float t, const Vector4 & unitVec0, const Vector4 & unitVec1 );
-
-// Conditionally select between two 4-D vectors
-// 
-inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, bool select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 4-D vector
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector4 & vec );
-
-// Print a 4-D vector and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector4 & vec, const char * name );
-
-#endif
-
-// A 3-D point in array-of-structures format
-//
-class Point3
-{
-    float mX;
-    float mY;
-    float mZ;
-#ifndef __GNUC__
-    float d;
-#endif
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Point3( ) { };
-
-    // Copy a 3-D point
-    // 
-    inline Point3( const Point3 & pnt );
-
-    // Construct a 3-D point from x, y, and z elements
-    // 
-    inline Point3( float x, float y, float z );
-
-    // Copy elements from a 3-D vector into a 3-D point
-    // 
-    explicit inline Point3( const Vector3 & vec );
-
-    // Set all elements of a 3-D point to the same scalar value
-    // 
-    explicit inline Point3( float scalar );
-
-    // Assign one 3-D point to another
-    // 
-    inline Point3 & operator =( const Point3 & pnt );
-
-    // Set the x element of a 3-D point
-    // 
-    inline Point3 & setX( float x );
-
-    // Set the y element of a 3-D point
-    // 
-    inline Point3 & setY( float y );
-
-    // Set the z element of a 3-D point
-    // 
-    inline Point3 & setZ( float z );
-
-    // Get the x element of a 3-D point
-    // 
-    inline float getX( ) const;
-
-    // Get the y element of a 3-D point
-    // 
-    inline float getY( ) const;
-
-    // Get the z element of a 3-D point
-    // 
-    inline float getZ( ) const;
-
-    // Set an x, y, or z element of a 3-D point by index
-    // 
-    inline Point3 & setElem( int idx, float value );
-
-    // Get an x, y, or z element of a 3-D point by index
-    // 
-    inline float getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline float & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline float operator []( int idx ) const;
-
-    // Subtract a 3-D point from another 3-D point
-    // 
-    inline const Vector3 operator -( const Point3 & pnt ) const;
-
-    // Add a 3-D point to a 3-D vector
-    // 
-    inline const Point3 operator +( const Vector3 & vec ) const;
-
-    // Subtract a 3-D vector from a 3-D point
-    // 
-    inline const Point3 operator -( const Vector3 & vec ) const;
-
-    // Perform compound assignment and addition with a 3-D vector
-    // 
-    inline Point3 & operator +=( const Vector3 & vec );
-
-    // Perform compound assignment and subtraction by a 3-D vector
-    // 
-    inline Point3 & operator -=( const Vector3 & vec );
-
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-;
-
-// Multiply two 3-D points per element
-// 
-inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Divide two 3-D points per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Compute the reciprocal of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Point3 recipPerElem( const Point3 & pnt );
-
-// Compute the square root of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Point3 sqrtPerElem( const Point3 & pnt );
-
-// Compute the reciprocal square root of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Point3 rsqrtPerElem( const Point3 & pnt );
-
-// Compute the absolute value of a 3-D point per element
-// 
-inline const Point3 absPerElem( const Point3 & pnt );
-
-// Copy sign from one 3-D point to another, per element
-// 
-inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Maximum of two 3-D points per element
-// 
-inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Minimum of two 3-D points per element
-// 
-inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Maximum element of a 3-D point
-// 
-inline float maxElem( const Point3 & pnt );
-
-// Minimum element of a 3-D point
-// 
-inline float minElem( const Point3 & pnt );
-
-// Compute the sum of all elements of a 3-D point
-// 
-inline float sum( const Point3 & pnt );
-
-// Apply uniform scale to a 3-D point
-// 
-inline const Point3 scale( const Point3 & pnt, float scaleVal );
-
-// Apply non-uniform scale to a 3-D point
-// 
-inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec );
-
-// Scalar projection of a 3-D point on a unit-length 3-D vector
-// 
-inline float projection( const Point3 & pnt, const Vector3 & unitVec );
-
-// Compute the square of the distance of a 3-D point from the coordinate-system origin
-// 
-inline float distSqrFromOrigin( const Point3 & pnt );
-
-// Compute the distance of a 3-D point from the coordinate-system origin
-// 
-inline float distFromOrigin( const Point3 & pnt );
-
-// Compute the square of the distance between two 3-D points
-// 
-inline float distSqr( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Compute the distance between two 3-D points
-// 
-inline float dist( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Linear interpolation between two 3-D points
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Point3 lerp( float t, const Point3 & pnt0, const Point3 & pnt1 );
-
-// Conditionally select between two 3-D points
-// 
-inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, bool select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3-D point
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Point3 & pnt );
-
-// Print a 3-D point and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Point3 & pnt, const char * name );
-
-#endif
-
-// A quaternion in array-of-structures format
-//
-class Quat
-{
-    float mX;
-    float mY;
-    float mZ;
-    float mW;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Quat( ) { };
-
-    // Copy a quaternion
-    // 
-    inline Quat( const Quat & quat );
-
-    // Construct a quaternion from x, y, z, and w elements
-    // 
-    inline Quat( float x, float y, float z, float w );
-
-    // Construct a quaternion from a 3-D vector and a scalar
-    // 
-    inline Quat( const Vector3 & xyz, float w );
-
-    // Copy elements from a 4-D vector into a quaternion
-    // 
-    explicit inline Quat( const Vector4 & vec );
-
-    // Convert a rotation matrix to a unit-length quaternion
-    // 
-    explicit inline Quat( const Matrix3 & rotMat );
-
-    // Set all elements of a quaternion to the same scalar value
-    // 
-    explicit inline Quat( float scalar );
-
-    // Assign one quaternion to another
-    // 
-    inline Quat & operator =( const Quat & quat );
-
-    // Set the x, y, and z elements of a quaternion
-    // NOTE: 
-    // This function does not change the w element.
-    // 
-    inline Quat & setXYZ( const Vector3 & vec );
-
-    // Get the x, y, and z elements of a quaternion
-    // 
-    inline const Vector3 getXYZ( ) const;
-
-    // Set the x element of a quaternion
-    // 
-    inline Quat & setX( float x );
-
-    // Set the y element of a quaternion
-    // 
-    inline Quat & setY( float y );
-
-    // Set the z element of a quaternion
-    // 
-    inline Quat & setZ( float z );
-
-    // Set the w element of a quaternion
-    // 
-    inline Quat & setW( float w );
-
-    // Get the x element of a quaternion
-    // 
-    inline float getX( ) const;
-
-    // Get the y element of a quaternion
-    // 
-    inline float getY( ) const;
-
-    // Get the z element of a quaternion
-    // 
-    inline float getZ( ) const;
-
-    // Get the w element of a quaternion
-    // 
-    inline float getW( ) const;
-
-    // Set an x, y, z, or w element of a quaternion by index
-    // 
-    inline Quat & setElem( int idx, float value );
-
-    // Get an x, y, z, or w element of a quaternion by index
-    // 
-    inline float getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline float & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline float operator []( int idx ) const;
-
-    // Add two quaternions
-    // 
-    inline const Quat operator +( const Quat & quat ) const;
-
-    // Subtract a quaternion from another quaternion
-    // 
-    inline const Quat operator -( const Quat & quat ) const;
-
-    // Multiply two quaternions
-    // 
-    inline const Quat operator *( const Quat & quat ) const;
-
-    // Multiply a quaternion by a scalar
-    // 
-    inline const Quat operator *( float scalar ) const;
-
-    // Divide a quaternion by a scalar
-    // 
-    inline const Quat operator /( float scalar ) const;
-
-    // Perform compound assignment and addition with a quaternion
-    // 
-    inline Quat & operator +=( const Quat & quat );
-
-    // Perform compound assignment and subtraction by a quaternion
-    // 
-    inline Quat & operator -=( const Quat & quat );
-
-    // Perform compound assignment and multiplication by a quaternion
-    // 
-    inline Quat & operator *=( const Quat & quat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Quat & operator *=( float scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Quat & operator /=( float scalar );
-
-    // Negate all elements of a quaternion
-    // 
-    inline const Quat operator -( ) const;
-
-    // Construct an identity quaternion
-    // 
-    static inline const Quat identity( );
-
-    // Construct a quaternion to rotate between two unit-length 3-D vectors
-    // NOTE: 
-    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
-    // 
-    static inline const Quat rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 );
-
-    // Construct a quaternion to rotate around a unit-length 3-D vector
-    // 
-    static inline const Quat rotation( float radians, const Vector3 & unitVec );
-
-    // Construct a quaternion to rotate around the x axis
-    // 
-    static inline const Quat rotationX( float radians );
-
-    // Construct a quaternion to rotate around the y axis
-    // 
-    static inline const Quat rotationY( float radians );
-
-    // Construct a quaternion to rotate around the z axis
-    // 
-    static inline const Quat rotationZ( float radians );
-
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-;
-
-// Multiply a quaternion by a scalar
-// 
-inline const Quat operator *( float scalar, const Quat & quat );
-
-// Compute the conjugate of a quaternion
-// 
-inline const Quat conj( const Quat & quat );
-
-// Use a unit-length quaternion to rotate a 3-D vector
-// 
-inline const Vector3 rotate( const Quat & unitQuat, const Vector3 & vec );
-
-// Compute the dot product of two quaternions
-// 
-inline float dot( const Quat & quat0, const Quat & quat1 );
-
-// Compute the norm of a quaternion
-// 
-inline float norm( const Quat & quat );
-
-// Compute the length of a quaternion
-// 
-inline float length( const Quat & quat );
-
-// Normalize a quaternion
-// NOTE: 
-// The result is unpredictable when all elements of quat are at or near zero.
-// 
-inline const Quat normalize( const Quat & quat );
-
-// Linear interpolation between two quaternions
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat lerp( float t, const Quat & quat0, const Quat & quat1 );
-
-// Spherical linear interpolation between two quaternions
-// NOTE: 
-// Interpolates along the shortest path between orientations.
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat slerp( float t, const Quat & unitQuat0, const Quat & unitQuat1 );
-
-// Spherical quadrangle interpolation
-// 
-inline const Quat squad( float t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 );
-
-// Conditionally select between two quaternions
-// 
-inline const Quat select( const Quat & quat0, const Quat & quat1, bool select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a quaternion
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Quat & quat );
-
-// Print a quaternion and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Quat & quat, const char * name );
-
-#endif
-
-// A 3x3 matrix in array-of-structures format
-//
-class Matrix3
-{
-    Vector3 mCol0;
-    Vector3 mCol1;
-    Vector3 mCol2;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Matrix3( ) { };
-
-    // Copy a 3x3 matrix
-    // 
-    inline Matrix3( const Matrix3 & mat );
-
-    // Construct a 3x3 matrix containing the specified columns
-    // 
-    inline Matrix3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2 );
-
-    // Construct a 3x3 rotation matrix from a unit-length quaternion
-    // 
-    explicit inline Matrix3( const Quat & unitQuat );
-
-    // Set all elements of a 3x3 matrix to the same scalar value
-    // 
-    explicit inline Matrix3( float scalar );
-
-    // Assign one 3x3 matrix to another
-    // 
-    inline Matrix3 & operator =( const Matrix3 & mat );
-
-    // Set column 0 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol0( const Vector3 & col0 );
-
-    // Set column 1 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol1( const Vector3 & col1 );
-
-    // Set column 2 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol2( const Vector3 & col2 );
-
-    // Get column 0 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol0( ) const;
-
-    // Get column 1 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol1( ) const;
-
-    // Get column 2 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol2( ) const;
-
-    // Set the column of a 3x3 matrix referred to by the specified index
-    // 
-    inline Matrix3 & setCol( int col, const Vector3 & vec );
-
-    // Set the row of a 3x3 matrix referred to by the specified index
-    // 
-    inline Matrix3 & setRow( int row, const Vector3 & vec );
-
-    // Get the column of a 3x3 matrix referred to by the specified index
-    // 
-    inline const Vector3 getCol( int col ) const;
-
-    // Get the row of a 3x3 matrix referred to by the specified index
-    // 
-    inline const Vector3 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector3 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector3 operator []( int col ) const;
-
-    // Set the element of a 3x3 matrix referred to by column and row indices
-    // 
-    inline Matrix3 & setElem( int col, int row, float val );
-
-    // Get the element of a 3x3 matrix referred to by column and row indices
-    // 
-    inline float getElem( int col, int row ) const;
-
-    // Add two 3x3 matrices
-    // 
-    inline const Matrix3 operator +( const Matrix3 & mat ) const;
-
-    // Subtract a 3x3 matrix from another 3x3 matrix
-    // 
-    inline const Matrix3 operator -( const Matrix3 & mat ) const;
-
-    // Negate all elements of a 3x3 matrix
-    // 
-    inline const Matrix3 operator -( ) const;
-
-    // Multiply a 3x3 matrix by a scalar
-    // 
-    inline const Matrix3 operator *( float scalar ) const;
-
-    // Multiply a 3x3 matrix by a 3-D vector
-    // 
-    inline const Vector3 operator *( const Vector3 & vec ) const;
-
-    // Multiply two 3x3 matrices
-    // 
-    inline const Matrix3 operator *( const Matrix3 & mat ) const;
-
-    // Perform compound assignment and addition with a 3x3 matrix
-    // 
-    inline Matrix3 & operator +=( const Matrix3 & mat );
-
-    // Perform compound assignment and subtraction by a 3x3 matrix
-    // 
-    inline Matrix3 & operator -=( const Matrix3 & mat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Matrix3 & operator *=( float scalar );
-
-    // Perform compound assignment and multiplication by a 3x3 matrix
-    // 
-    inline Matrix3 & operator *=( const Matrix3 & mat );
-
-    // Construct an identity 3x3 matrix
-    // 
-    static inline const Matrix3 identity( );
-
-    // Construct a 3x3 matrix to rotate around the x axis
-    // 
-    static inline const Matrix3 rotationX( float radians );
-
-    // Construct a 3x3 matrix to rotate around the y axis
-    // 
-    static inline const Matrix3 rotationY( float radians );
-
-    // Construct a 3x3 matrix to rotate around the z axis
-    // 
-    static inline const Matrix3 rotationZ( float radians );
-
-    // Construct a 3x3 matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Matrix3 rotationZYX( const Vector3 & radiansXYZ );
-
-    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Matrix3 rotation( float radians, const Vector3 & unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Matrix3 rotation( const Quat & unitQuat );
-
-    // Construct a 3x3 matrix to perform scaling
-    // 
-    static inline const Matrix3 scale( const Vector3 & scaleVec );
-
-};
-// Multiply a 3x3 matrix by a scalar
-// 
-inline const Matrix3 operator *( float scalar, const Matrix3 & mat );
-
-// Append (post-multiply) a scale transformation to a 3x3 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat );
-
-// Multiply two 3x3 matrices per element
-// 
-inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
-
-// Compute the absolute value of a 3x3 matrix per element
-// 
-inline const Matrix3 absPerElem( const Matrix3 & mat );
-
-// Transpose of a 3x3 matrix
-// 
-inline const Matrix3 transpose( const Matrix3 & mat );
-
-// Compute the inverse of a 3x3 matrix
-// NOTE: 
-// Result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix3 inverse( const Matrix3 & mat );
-
-// Determinant of a 3x3 matrix
-// 
-inline float determinant( const Matrix3 & mat );
-
-// Conditionally select between two 3x3 matrices
-// 
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3x3 matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix3 & mat );
-
-// Print a 3x3 matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix3 & mat, const char * name );
-
-#endif
-
-// A 4x4 matrix in array-of-structures format
-//
-class Matrix4
-{
-    Vector4 mCol0;
-    Vector4 mCol1;
-    Vector4 mCol2;
-    Vector4 mCol3;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Matrix4( ) { };
-
-    // Copy a 4x4 matrix
-    // 
-    inline Matrix4( const Matrix4 & mat );
-
-    // Construct a 4x4 matrix containing the specified columns
-    // 
-    inline Matrix4( const Vector4 & col0, const Vector4 & col1, const Vector4 & col2, const Vector4 & col3 );
-
-    // Construct a 4x4 matrix from a 3x4 transformation matrix
-    // 
-    explicit inline Matrix4( const Transform3 & mat );
-
-    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
-    // 
-    inline Matrix4( const Matrix3 & mat, const Vector3 & translateVec );
-
-    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
-    // 
-    inline Matrix4( const Quat & unitQuat, const Vector3 & translateVec );
-
-    // Set all elements of a 4x4 matrix to the same scalar value
-    // 
-    explicit inline Matrix4( float scalar );
-
-    // Assign one 4x4 matrix to another
-    // 
-    inline Matrix4 & operator =( const Matrix4 & mat );
-
-    // Set the upper-left 3x3 submatrix
-    // NOTE: 
-    // This function does not change the bottom row elements.
-    // 
-    inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
-
-    // Get the upper-left 3x3 submatrix of a 4x4 matrix
-    // 
-    inline const Matrix3 getUpper3x3( ) const;
-
-    // Set translation component
-    // NOTE: 
-    // This function does not change the bottom row elements.
-    // 
-    inline Matrix4 & setTranslation( const Vector3 & translateVec );
-
-    // Get the translation component of a 4x4 matrix
-    // 
-    inline const Vector3 getTranslation( ) const;
-
-    // Set column 0 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol0( const Vector4 & col0 );
-
-    // Set column 1 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol1( const Vector4 & col1 );
-
-    // Set column 2 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol2( const Vector4 & col2 );
-
-    // Set column 3 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol3( const Vector4 & col3 );
-
-    // Get column 0 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol0( ) const;
-
-    // Get column 1 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol1( ) const;
-
-    // Get column 2 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol2( ) const;
-
-    // Get column 3 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol3( ) const;
-
-    // Set the column of a 4x4 matrix referred to by the specified index
-    // 
-    inline Matrix4 & setCol( int col, const Vector4 & vec );
-
-    // Set the row of a 4x4 matrix referred to by the specified index
-    // 
-    inline Matrix4 & setRow( int row, const Vector4 & vec );
-
-    // Get the column of a 4x4 matrix referred to by the specified index
-    // 
-    inline const Vector4 getCol( int col ) const;
-
-    // Get the row of a 4x4 matrix referred to by the specified index
-    // 
-    inline const Vector4 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector4 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector4 operator []( int col ) const;
-
-    // Set the element of a 4x4 matrix referred to by column and row indices
-    // 
-    inline Matrix4 & setElem( int col, int row, float val );
-
-    // Get the element of a 4x4 matrix referred to by column and row indices
-    // 
-    inline float getElem( int col, int row ) const;
-
-    // Add two 4x4 matrices
-    // 
-    inline const Matrix4 operator +( const Matrix4 & mat ) const;
-
-    // Subtract a 4x4 matrix from another 4x4 matrix
-    // 
-    inline const Matrix4 operator -( const Matrix4 & mat ) const;
-
-    // Negate all elements of a 4x4 matrix
-    // 
-    inline const Matrix4 operator -( ) const;
-
-    // Multiply a 4x4 matrix by a scalar
-    // 
-    inline const Matrix4 operator *( float scalar ) const;
-
-    // Multiply a 4x4 matrix by a 4-D vector
-    // 
-    inline const Vector4 operator *( const Vector4 & vec ) const;
-
-    // Multiply a 4x4 matrix by a 3-D vector
-    // 
-    inline const Vector4 operator *( const Vector3 & vec ) const;
-
-    // Multiply a 4x4 matrix by a 3-D point
-    // 
-    inline const Vector4 operator *( const Point3 & pnt ) const;
-
-    // Multiply two 4x4 matrices
-    // 
-    inline const Matrix4 operator *( const Matrix4 & mat ) const;
-
-    // Multiply a 4x4 matrix by a 3x4 transformation matrix
-    // 
-    inline const Matrix4 operator *( const Transform3 & tfrm ) const;
-
-    // Perform compound assignment and addition with a 4x4 matrix
-    // 
-    inline Matrix4 & operator +=( const Matrix4 & mat );
-
-    // Perform compound assignment and subtraction by a 4x4 matrix
-    // 
-    inline Matrix4 & operator -=( const Matrix4 & mat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Matrix4 & operator *=( float scalar );
-
-    // Perform compound assignment and multiplication by a 4x4 matrix
-    // 
-    inline Matrix4 & operator *=( const Matrix4 & mat );
-
-    // Perform compound assignment and multiplication by a 3x4 transformation matrix
-    // 
-    inline Matrix4 & operator *=( const Transform3 & tfrm );
-
-    // Construct an identity 4x4 matrix
-    // 
-    static inline const Matrix4 identity( );
-
-    // Construct a 4x4 matrix to rotate around the x axis
-    // 
-    static inline const Matrix4 rotationX( float radians );
-
-    // Construct a 4x4 matrix to rotate around the y axis
-    // 
-    static inline const Matrix4 rotationY( float radians );
-
-    // Construct a 4x4 matrix to rotate around the z axis
-    // 
-    static inline const Matrix4 rotationZ( float radians );
-
-    // Construct a 4x4 matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Matrix4 rotationZYX( const Vector3 & radiansXYZ );
-
-    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Matrix4 rotation( float radians, const Vector3 & unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Matrix4 rotation( const Quat & unitQuat );
-
-    // Construct a 4x4 matrix to perform scaling
-    // 
-    static inline const Matrix4 scale( const Vector3 & scaleVec );
-
-    // Construct a 4x4 matrix to perform translation
-    // 
-    static inline const Matrix4 translation( const Vector3 & translateVec );
-
-    // Construct viewing matrix based on eye position, position looked at, and up direction
-    // 
-    static inline const Matrix4 lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec );
-
-    // Construct a perspective projection matrix
-    // 
-    static inline const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar );
-
-    // Construct a perspective projection matrix based on frustum
-    // 
-    static inline const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar );
-
-    // Construct an orthographic projection matrix
-    // 
-    static inline const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar );
-
-};
-// Multiply a 4x4 matrix by a scalar
-// 
-inline const Matrix4 operator *( float scalar, const Matrix4 & mat );
-
-// Append (post-multiply) a scale transformation to a 4x4 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat );
-
-// Multiply two 4x4 matrices per element
-// 
-inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
-
-// Compute the absolute value of a 4x4 matrix per element
-// 
-inline const Matrix4 absPerElem( const Matrix4 & mat );
-
-// Transpose of a 4x4 matrix
-// 
-inline const Matrix4 transpose( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix
-// NOTE: 
-// Result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix4 inverse( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix4 affineInverse( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
-// 
-inline const Matrix4 orthoInverse( const Matrix4 & mat );
-
-// Determinant of a 4x4 matrix
-// 
-inline float determinant( const Matrix4 & mat );
-
-// Conditionally select between two 4x4 matrices
-// 
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 4x4 matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix4 & mat );
-
-// Print a 4x4 matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix4 & mat, const char * name );
-
-#endif
-
-// A 3x4 transformation matrix in array-of-structures format
-//
-class Transform3
-{
-    Vector3 mCol0;
-    Vector3 mCol1;
-    Vector3 mCol2;
-    Vector3 mCol3;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Transform3( ) { };
-
-    // Copy a 3x4 transformation matrix
-    // 
-    inline Transform3( const Transform3 & tfrm );
-
-    // Construct a 3x4 transformation matrix containing the specified columns
-    // 
-    inline Transform3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2, const Vector3 & col3 );
-
-    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
-    // 
-    inline Transform3( const Matrix3 & tfrm, const Vector3 & translateVec );
-
-    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
-    // 
-    inline Transform3( const Quat & unitQuat, const Vector3 & translateVec );
-
-    // Set all elements of a 3x4 transformation matrix to the same scalar value
-    // 
-    explicit inline Transform3( float scalar );
-
-    // Assign one 3x4 transformation matrix to another
-    // 
-    inline Transform3 & operator =( const Transform3 & tfrm );
-
-    // Set the upper-left 3x3 submatrix
-    // 
-    inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
-
-    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
-    // 
-    inline const Matrix3 getUpper3x3( ) const;
-
-    // Set translation component
-    // 
-    inline Transform3 & setTranslation( const Vector3 & translateVec );
-
-    // Get the translation component of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getTranslation( ) const;
-
-    // Set column 0 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol0( const Vector3 & col0 );
-
-    // Set column 1 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol1( const Vector3 & col1 );
-
-    // Set column 2 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol2( const Vector3 & col2 );
-
-    // Set column 3 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol3( const Vector3 & col3 );
-
-    // Get column 0 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol0( ) const;
-
-    // Get column 1 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol1( ) const;
-
-    // Get column 2 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol2( ) const;
-
-    // Get column 3 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol3( ) const;
-
-    // Set the column of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline Transform3 & setCol( int col, const Vector3 & vec );
-
-    // Set the row of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline Transform3 & setRow( int row, const Vector4 & vec );
-
-    // Get the column of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline const Vector3 getCol( int col ) const;
-
-    // Get the row of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline const Vector4 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector3 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector3 operator []( int col ) const;
-
-    // Set the element of a 3x4 transformation matrix referred to by column and row indices
-    // 
-    inline Transform3 & setElem( int col, int row, float val );
-
-    // Get the element of a 3x4 transformation matrix referred to by column and row indices
-    // 
-    inline float getElem( int col, int row ) const;
-
-    // Multiply a 3x4 transformation matrix by a 3-D vector
-    // 
-    inline const Vector3 operator *( const Vector3 & vec ) const;
-
-    // Multiply a 3x4 transformation matrix by a 3-D point
-    // 
-    inline const Point3 operator *( const Point3 & pnt ) const;
-
-    // Multiply two 3x4 transformation matrices
-    // 
-    inline const Transform3 operator *( const Transform3 & tfrm ) const;
-
-    // Perform compound assignment and multiplication by a 3x4 transformation matrix
-    // 
-    inline Transform3 & operator *=( const Transform3 & tfrm );
-
-    // Construct an identity 3x4 transformation matrix
-    // 
-    static inline const Transform3 identity( );
-
-    // Construct a 3x4 transformation matrix to rotate around the x axis
-    // 
-    static inline const Transform3 rotationX( float radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the y axis
-    // 
-    static inline const Transform3 rotationY( float radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the z axis
-    // 
-    static inline const Transform3 rotationZ( float radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Transform3 rotationZYX( const Vector3 & radiansXYZ );
-
-    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Transform3 rotation( float radians, const Vector3 & unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Transform3 rotation( const Quat & unitQuat );
-
-    // Construct a 3x4 transformation matrix to perform scaling
-    // 
-    static inline const Transform3 scale( const Vector3 & scaleVec );
-
-    // Construct a 3x4 transformation matrix to perform translation
-    // 
-    static inline const Transform3 translation( const Vector3 & translateVec );
-
-};
-// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm );
-
-// Multiply two 3x4 transformation matrices per element
-// 
-inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
-
-// Compute the absolute value of a 3x4 transformation matrix per element
-// 
-inline const Transform3 absPerElem( const Transform3 & tfrm );
-
-// Inverse of a 3x4 transformation matrix
-// NOTE: 
-// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
-// 
-inline const Transform3 inverse( const Transform3 & tfrm );
-
-// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
-// 
-inline const Transform3 orthoInverse( const Transform3 & tfrm );
-
-// Conditionally select between two 3x4 transformation matrices
-// 
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3x4 transformation matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Transform3 & tfrm );
-
-// Print a 3x4 transformation matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Transform3 & tfrm, const char * name );
-
-#endif
-
-} // namespace Aos
-} // namespace Vectormath
-
-#include "vec_aos.h"
-#include "quat_aos.h"
-#include "mat_aos.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_AOS_CPP_SCALAR_H
+#define _VECTORMATH_AOS_CPP_SCALAR_H
+
+#include <math.h>
+
+#ifdef _VECTORMATH_DEBUG
+#include <stdio.h>
+#endif
+
+namespace Vectormath {
+
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// Forward Declarations
+//
+
+class Vector3;
+class Vector4;
+class Point3;
+class Quat;
+class Matrix3;
+class Matrix4;
+class Transform3;
+
+// A 3-D vector in array-of-structures format
+//
+class Vector3
+{
+    float mX;
+    float mY;
+    float mZ;
+#ifndef __GNUC__
+    float d;
+#endif
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector3( ) { };
+
+    // Copy a 3-D vector
+    // 
+    inline Vector3( const Vector3 & vec );
+
+    // Construct a 3-D vector from x, y, and z elements
+    // 
+    inline Vector3( float x, float y, float z );
+
+    // Copy elements from a 3-D point into a 3-D vector
+    // 
+    explicit inline Vector3( const Point3 & pnt );
+
+    // Set all elements of a 3-D vector to the same scalar value
+    // 
+    explicit inline Vector3( float scalar );
+
+    // Assign one 3-D vector to another
+    // 
+    inline Vector3 & operator =( const Vector3 & vec );
+
+    // Set the x element of a 3-D vector
+    // 
+    inline Vector3 & setX( float x );
+
+    // Set the y element of a 3-D vector
+    // 
+    inline Vector3 & setY( float y );
+
+    // Set the z element of a 3-D vector
+    // 
+    inline Vector3 & setZ( float z );
+
+    // Get the x element of a 3-D vector
+    // 
+    inline float getX( ) const;
+
+    // Get the y element of a 3-D vector
+    // 
+    inline float getY( ) const;
+
+    // Get the z element of a 3-D vector
+    // 
+    inline float getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D vector by index
+    // 
+    inline Vector3 & setElem( int idx, float value );
+
+    // Get an x, y, or z element of a 3-D vector by index
+    // 
+    inline float getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline float & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline float operator []( int idx ) const;
+
+    // Add two 3-D vectors
+    // 
+    inline const Vector3 operator +( const Vector3 & vec ) const;
+
+    // Subtract a 3-D vector from another 3-D vector
+    // 
+    inline const Vector3 operator -( const Vector3 & vec ) const;
+
+    // Add a 3-D vector to a 3-D point
+    // 
+    inline const Point3 operator +( const Point3 & pnt ) const;
+
+    // Multiply a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator *( float scalar ) const;
+
+    // Divide a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator /( float scalar ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Vector3 & operator +=( const Vector3 & vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Vector3 & operator -=( const Vector3 & vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector3 & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector3 & operator /=( float scalar );
+
+    // Negate all elements of a 3-D vector
+    // 
+    inline const Vector3 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector3 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector3 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector3 zAxis( );
+
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+;
+
+// Multiply a 3-D vector by a scalar
+// 
+inline const Vector3 operator *( float scalar, const Vector3 & vec );
+
+// Multiply two 3-D vectors per element
+// 
+inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Divide two 3-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Compute the reciprocal of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector3 recipPerElem( const Vector3 & vec );
+
+// Compute the square root of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Vector3 sqrtPerElem( const Vector3 & vec );
+
+// Compute the reciprocal square root of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Vector3 rsqrtPerElem( const Vector3 & vec );
+
+// Compute the absolute value of a 3-D vector per element
+// 
+inline const Vector3 absPerElem( const Vector3 & vec );
+
+// Copy sign from one 3-D vector to another, per element
+// 
+inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Maximum of two 3-D vectors per element
+// 
+inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Minimum of two 3-D vectors per element
+// 
+inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Maximum element of a 3-D vector
+// 
+inline float maxElem( const Vector3 & vec );
+
+// Minimum element of a 3-D vector
+// 
+inline float minElem( const Vector3 & vec );
+
+// Compute the sum of all elements of a 3-D vector
+// 
+inline float sum( const Vector3 & vec );
+
+// Compute the dot product of two 3-D vectors
+// 
+inline float dot( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Compute the square of the length of a 3-D vector
+// 
+inline float lengthSqr( const Vector3 & vec );
+
+// Compute the length of a 3-D vector
+// 
+inline float length( const Vector3 & vec );
+
+// Normalize a 3-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector3 normalize( const Vector3 & vec );
+
+// Compute cross product of two 3-D vectors
+// 
+inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Outer product of two 3-D vectors
+// 
+inline const Matrix3 outer( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Pre-multiply a row vector by a 3x3 matrix
+// 
+inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat );
+
+// Cross-product matrix of a 3-D vector
+// 
+inline const Matrix3 crossMatrix( const Vector3 & vec );
+
+// Create cross-product matrix and multiply
+// NOTE: 
+// Faster than separately creating a cross-product matrix and multiplying.
+// 
+inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat );
+
+// Linear interpolation between two 3-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 lerp( float t, const Vector3 & vec0, const Vector3 & vec1 );
+
+// Spherical linear interpolation between two 3-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & unitVec1 );
+
+// Conditionally select between two 3-D vectors
+// 
+inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector3 & vec );
+
+// Print a 3-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector3 & vec, const char * name );
+
+#endif
+
+// A 4-D vector in array-of-structures format
+//
+class Vector4
+{
+    float mX;
+    float mY;
+    float mZ;
+    float mW;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector4( ) { };
+
+    // Copy a 4-D vector
+    // 
+    inline Vector4( const Vector4 & vec );
+
+    // Construct a 4-D vector from x, y, z, and w elements
+    // 
+    inline Vector4( float x, float y, float z, float w );
+
+    // Construct a 4-D vector from a 3-D vector and a scalar
+    // 
+    inline Vector4( const Vector3 & xyz, float w );
+
+    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+    // 
+    explicit inline Vector4( const Vector3 & vec );
+
+    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+    // 
+    explicit inline Vector4( const Point3 & pnt );
+
+    // Copy elements from a quaternion into a 4-D vector
+    // 
+    explicit inline Vector4( const Quat & quat );
+
+    // Set all elements of a 4-D vector to the same scalar value
+    // 
+    explicit inline Vector4( float scalar );
+
+    // Assign one 4-D vector to another
+    // 
+    inline Vector4 & operator =( const Vector4 & vec );
+
+    // Set the x, y, and z elements of a 4-D vector
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Vector4 & setXYZ( const Vector3 & vec );
+
+    // Get the x, y, and z elements of a 4-D vector
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a 4-D vector
+    // 
+    inline Vector4 & setX( float x );
+
+    // Set the y element of a 4-D vector
+    // 
+    inline Vector4 & setY( float y );
+
+    // Set the z element of a 4-D vector
+    // 
+    inline Vector4 & setZ( float z );
+
+    // Set the w element of a 4-D vector
+    // 
+    inline Vector4 & setW( float w );
+
+    // Get the x element of a 4-D vector
+    // 
+    inline float getX( ) const;
+
+    // Get the y element of a 4-D vector
+    // 
+    inline float getY( ) const;
+
+    // Get the z element of a 4-D vector
+    // 
+    inline float getZ( ) const;
+
+    // Get the w element of a 4-D vector
+    // 
+    inline float getW( ) const;
+
+    // Set an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline Vector4 & setElem( int idx, float value );
+
+    // Get an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline float getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline float & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline float operator []( int idx ) const;
+
+    // Add two 4-D vectors
+    // 
+    inline const Vector4 operator +( const Vector4 & vec ) const;
+
+    // Subtract a 4-D vector from another 4-D vector
+    // 
+    inline const Vector4 operator -( const Vector4 & vec ) const;
+
+    // Multiply a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator *( float scalar ) const;
+
+    // Divide a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator /( float scalar ) const;
+
+    // Perform compound assignment and addition with a 4-D vector
+    // 
+    inline Vector4 & operator +=( const Vector4 & vec );
+
+    // Perform compound assignment and subtraction by a 4-D vector
+    // 
+    inline Vector4 & operator -=( const Vector4 & vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector4 & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector4 & operator /=( float scalar );
+
+    // Negate all elements of a 4-D vector
+    // 
+    inline const Vector4 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector4 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector4 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector4 zAxis( );
+
+    // Construct w axis
+    // 
+    static inline const Vector4 wAxis( );
+
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+;
+
+// Multiply a 4-D vector by a scalar
+// 
+inline const Vector4 operator *( float scalar, const Vector4 & vec );
+
+// Multiply two 4-D vectors per element
+// 
+inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Divide two 4-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Compute the reciprocal of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector4 recipPerElem( const Vector4 & vec );
+
+// Compute the square root of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Vector4 sqrtPerElem( const Vector4 & vec );
+
+// Compute the reciprocal square root of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Vector4 rsqrtPerElem( const Vector4 & vec );
+
+// Compute the absolute value of a 4-D vector per element
+// 
+inline const Vector4 absPerElem( const Vector4 & vec );
+
+// Copy sign from one 4-D vector to another, per element
+// 
+inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Maximum of two 4-D vectors per element
+// 
+inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Minimum of two 4-D vectors per element
+// 
+inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Maximum element of a 4-D vector
+// 
+inline float maxElem( const Vector4 & vec );
+
+// Minimum element of a 4-D vector
+// 
+inline float minElem( const Vector4 & vec );
+
+// Compute the sum of all elements of a 4-D vector
+// 
+inline float sum( const Vector4 & vec );
+
+// Compute the dot product of two 4-D vectors
+// 
+inline float dot( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Compute the square of the length of a 4-D vector
+// 
+inline float lengthSqr( const Vector4 & vec );
+
+// Compute the length of a 4-D vector
+// 
+inline float length( const Vector4 & vec );
+
+// Normalize a 4-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector4 normalize( const Vector4 & vec );
+
+// Outer product of two 4-D vectors
+// 
+inline const Matrix4 outer( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Linear interpolation between two 4-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 lerp( float t, const Vector4 & vec0, const Vector4 & vec1 );
+
+// Spherical linear interpolation between two 4-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 slerp( float t, const Vector4 & unitVec0, const Vector4 & unitVec1 );
+
+// Conditionally select between two 4-D vectors
+// 
+inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector4 & vec );
+
+// Print a 4-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector4 & vec, const char * name );
+
+#endif
+
+// A 3-D point in array-of-structures format
+//
+class Point3
+{
+    float mX;
+    float mY;
+    float mZ;
+#ifndef __GNUC__
+    float d;
+#endif
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Point3( ) { };
+
+    // Copy a 3-D point
+    // 
+    inline Point3( const Point3 & pnt );
+
+    // Construct a 3-D point from x, y, and z elements
+    // 
+    inline Point3( float x, float y, float z );
+
+    // Copy elements from a 3-D vector into a 3-D point
+    // 
+    explicit inline Point3( const Vector3 & vec );
+
+    // Set all elements of a 3-D point to the same scalar value
+    // 
+    explicit inline Point3( float scalar );
+
+    // Assign one 3-D point to another
+    // 
+    inline Point3 & operator =( const Point3 & pnt );
+
+    // Set the x element of a 3-D point
+    // 
+    inline Point3 & setX( float x );
+
+    // Set the y element of a 3-D point
+    // 
+    inline Point3 & setY( float y );
+
+    // Set the z element of a 3-D point
+    // 
+    inline Point3 & setZ( float z );
+
+    // Get the x element of a 3-D point
+    // 
+    inline float getX( ) const;
+
+    // Get the y element of a 3-D point
+    // 
+    inline float getY( ) const;
+
+    // Get the z element of a 3-D point
+    // 
+    inline float getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D point by index
+    // 
+    inline Point3 & setElem( int idx, float value );
+
+    // Get an x, y, or z element of a 3-D point by index
+    // 
+    inline float getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline float & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline float operator []( int idx ) const;
+
+    // Subtract a 3-D point from another 3-D point
+    // 
+    inline const Vector3 operator -( const Point3 & pnt ) const;
+
+    // Add a 3-D point to a 3-D vector
+    // 
+    inline const Point3 operator +( const Vector3 & vec ) const;
+
+    // Subtract a 3-D vector from a 3-D point
+    // 
+    inline const Point3 operator -( const Vector3 & vec ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Point3 & operator +=( const Vector3 & vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Point3 & operator -=( const Vector3 & vec );
+
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+;
+
+// Multiply two 3-D points per element
+// 
+inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Divide two 3-D points per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Compute the reciprocal of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Point3 recipPerElem( const Point3 & pnt );
+
+// Compute the square root of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Point3 sqrtPerElem( const Point3 & pnt );
+
+// Compute the reciprocal square root of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Point3 rsqrtPerElem( const Point3 & pnt );
+
+// Compute the absolute value of a 3-D point per element
+// 
+inline const Point3 absPerElem( const Point3 & pnt );
+
+// Copy sign from one 3-D point to another, per element
+// 
+inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Maximum of two 3-D points per element
+// 
+inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Minimum of two 3-D points per element
+// 
+inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Maximum element of a 3-D point
+// 
+inline float maxElem( const Point3 & pnt );
+
+// Minimum element of a 3-D point
+// 
+inline float minElem( const Point3 & pnt );
+
+// Compute the sum of all elements of a 3-D point
+// 
+inline float sum( const Point3 & pnt );
+
+// Apply uniform scale to a 3-D point
+// 
+inline const Point3 scale( const Point3 & pnt, float scaleVal );
+
+// Apply non-uniform scale to a 3-D point
+// 
+inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec );
+
+// Scalar projection of a 3-D point on a unit-length 3-D vector
+// 
+inline float projection( const Point3 & pnt, const Vector3 & unitVec );
+
+// Compute the square of the distance of a 3-D point from the coordinate-system origin
+// 
+inline float distSqrFromOrigin( const Point3 & pnt );
+
+// Compute the distance of a 3-D point from the coordinate-system origin
+// 
+inline float distFromOrigin( const Point3 & pnt );
+
+// Compute the square of the distance between two 3-D points
+// 
+inline float distSqr( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Compute the distance between two 3-D points
+// 
+inline float dist( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Linear interpolation between two 3-D points
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Point3 lerp( float t, const Point3 & pnt0, const Point3 & pnt1 );
+
+// Conditionally select between two 3-D points
+// 
+inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D point
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Point3 & pnt );
+
+// Print a 3-D point and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Point3 & pnt, const char * name );
+
+#endif
+
+// A quaternion in array-of-structures format
+//
+class Quat
+{
+    float mX;
+    float mY;
+    float mZ;
+    float mW;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Quat( ) { };
+
+    // Copy a quaternion
+    // 
+    inline Quat( const Quat & quat );
+
+    // Construct a quaternion from x, y, z, and w elements
+    // 
+    inline Quat( float x, float y, float z, float w );
+
+    // Construct a quaternion from a 3-D vector and a scalar
+    // 
+    inline Quat( const Vector3 & xyz, float w );
+
+    // Copy elements from a 4-D vector into a quaternion
+    // 
+    explicit inline Quat( const Vector4 & vec );
+
+    // Convert a rotation matrix to a unit-length quaternion
+    // 
+    explicit inline Quat( const Matrix3 & rotMat );
+
+    // Set all elements of a quaternion to the same scalar value
+    // 
+    explicit inline Quat( float scalar );
+
+    // Assign one quaternion to another
+    // 
+    inline Quat & operator =( const Quat & quat );
+
+    // Set the x, y, and z elements of a quaternion
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Quat & setXYZ( const Vector3 & vec );
+
+    // Get the x, y, and z elements of a quaternion
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a quaternion
+    // 
+    inline Quat & setX( float x );
+
+    // Set the y element of a quaternion
+    // 
+    inline Quat & setY( float y );
+
+    // Set the z element of a quaternion
+    // 
+    inline Quat & setZ( float z );
+
+    // Set the w element of a quaternion
+    // 
+    inline Quat & setW( float w );
+
+    // Get the x element of a quaternion
+    // 
+    inline float getX( ) const;
+
+    // Get the y element of a quaternion
+    // 
+    inline float getY( ) const;
+
+    // Get the z element of a quaternion
+    // 
+    inline float getZ( ) const;
+
+    // Get the w element of a quaternion
+    // 
+    inline float getW( ) const;
+
+    // Set an x, y, z, or w element of a quaternion by index
+    // 
+    inline Quat & setElem( int idx, float value );
+
+    // Get an x, y, z, or w element of a quaternion by index
+    // 
+    inline float getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline float & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline float operator []( int idx ) const;
+
+    // Add two quaternions
+    // 
+    inline const Quat operator +( const Quat & quat ) const;
+
+    // Subtract a quaternion from another quaternion
+    // 
+    inline const Quat operator -( const Quat & quat ) const;
+
+    // Multiply two quaternions
+    // 
+    inline const Quat operator *( const Quat & quat ) const;
+
+    // Multiply a quaternion by a scalar
+    // 
+    inline const Quat operator *( float scalar ) const;
+
+    // Divide a quaternion by a scalar
+    // 
+    inline const Quat operator /( float scalar ) const;
+
+    // Perform compound assignment and addition with a quaternion
+    // 
+    inline Quat & operator +=( const Quat & quat );
+
+    // Perform compound assignment and subtraction by a quaternion
+    // 
+    inline Quat & operator -=( const Quat & quat );
+
+    // Perform compound assignment and multiplication by a quaternion
+    // 
+    inline Quat & operator *=( const Quat & quat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Quat & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Quat & operator /=( float scalar );
+
+    // Negate all elements of a quaternion
+    // 
+    inline const Quat operator -( ) const;
+
+    // Construct an identity quaternion
+    // 
+    static inline const Quat identity( );
+
+    // Construct a quaternion to rotate between two unit-length 3-D vectors
+    // NOTE: 
+    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+    // 
+    static inline const Quat rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 );
+
+    // Construct a quaternion to rotate around a unit-length 3-D vector
+    // 
+    static inline const Quat rotation( float radians, const Vector3 & unitVec );
+
+    // Construct a quaternion to rotate around the x axis
+    // 
+    static inline const Quat rotationX( float radians );
+
+    // Construct a quaternion to rotate around the y axis
+    // 
+    static inline const Quat rotationY( float radians );
+
+    // Construct a quaternion to rotate around the z axis
+    // 
+    static inline const Quat rotationZ( float radians );
+
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+;
+
+// Multiply a quaternion by a scalar
+// 
+inline const Quat operator *( float scalar, const Quat & quat );
+
+// Compute the conjugate of a quaternion
+// 
+inline const Quat conj( const Quat & quat );
+
+// Use a unit-length quaternion to rotate a 3-D vector
+// 
+inline const Vector3 rotate( const Quat & unitQuat, const Vector3 & vec );
+
+// Compute the dot product of two quaternions
+// 
+inline float dot( const Quat & quat0, const Quat & quat1 );
+
+// Compute the norm of a quaternion
+// 
+inline float norm( const Quat & quat );
+
+// Compute the length of a quaternion
+// 
+inline float length( const Quat & quat );
+
+// Normalize a quaternion
+// NOTE: 
+// The result is unpredictable when all elements of quat are at or near zero.
+// 
+inline const Quat normalize( const Quat & quat );
+
+// Linear interpolation between two quaternions
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat lerp( float t, const Quat & quat0, const Quat & quat1 );
+
+// Spherical linear interpolation between two quaternions
+// NOTE: 
+// Interpolates along the shortest path between orientations.
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat slerp( float t, const Quat & unitQuat0, const Quat & unitQuat1 );
+
+// Spherical quadrangle interpolation
+// 
+inline const Quat squad( float t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 );
+
+// Conditionally select between two quaternions
+// 
+inline const Quat select( const Quat & quat0, const Quat & quat1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a quaternion
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Quat & quat );
+
+// Print a quaternion and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Quat & quat, const char * name );
+
+#endif
+
+// A 3x3 matrix in array-of-structures format
+//
+class Matrix3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix3( ) { };
+
+    // Copy a 3x3 matrix
+    // 
+    inline Matrix3( const Matrix3 & mat );
+
+    // Construct a 3x3 matrix containing the specified columns
+    // 
+    inline Matrix3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2 );
+
+    // Construct a 3x3 rotation matrix from a unit-length quaternion
+    // 
+    explicit inline Matrix3( const Quat & unitQuat );
+
+    // Set all elements of a 3x3 matrix to the same scalar value
+    // 
+    explicit inline Matrix3( float scalar );
+
+    // Assign one 3x3 matrix to another
+    // 
+    inline Matrix3 & operator =( const Matrix3 & mat );
+
+    // Set column 0 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol0( const Vector3 & col0 );
+
+    // Set column 1 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol1( const Vector3 & col1 );
+
+    // Set column 2 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol2( const Vector3 & col2 );
+
+    // Get column 0 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Set the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setCol( int col, const Vector3 & vec );
+
+    // Set the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setRow( int row, const Vector3 & vec );
+
+    // Get the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline Matrix3 & setElem( int col, int row, float val );
+
+    // Get the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline float getElem( int col, int row ) const;
+
+    // Add two 3x3 matrices
+    // 
+    inline const Matrix3 operator +( const Matrix3 & mat ) const;
+
+    // Subtract a 3x3 matrix from another 3x3 matrix
+    // 
+    inline const Matrix3 operator -( const Matrix3 & mat ) const;
+
+    // Negate all elements of a 3x3 matrix
+    // 
+    inline const Matrix3 operator -( ) const;
+
+    // Multiply a 3x3 matrix by a scalar
+    // 
+    inline const Matrix3 operator *( float scalar ) const;
+
+    // Multiply a 3x3 matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( const Vector3 & vec ) const;
+
+    // Multiply two 3x3 matrices
+    // 
+    inline const Matrix3 operator *( const Matrix3 & mat ) const;
+
+    // Perform compound assignment and addition with a 3x3 matrix
+    // 
+    inline Matrix3 & operator +=( const Matrix3 & mat );
+
+    // Perform compound assignment and subtraction by a 3x3 matrix
+    // 
+    inline Matrix3 & operator -=( const Matrix3 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix3 & operator *=( float scalar );
+
+    // Perform compound assignment and multiplication by a 3x3 matrix
+    // 
+    inline Matrix3 & operator *=( const Matrix3 & mat );
+
+    // Construct an identity 3x3 matrix
+    // 
+    static inline const Matrix3 identity( );
+
+    // Construct a 3x3 matrix to rotate around the x axis
+    // 
+    static inline const Matrix3 rotationX( float radians );
+
+    // Construct a 3x3 matrix to rotate around the y axis
+    // 
+    static inline const Matrix3 rotationY( float radians );
+
+    // Construct a 3x3 matrix to rotate around the z axis
+    // 
+    static inline const Matrix3 rotationZ( float radians );
+
+    // Construct a 3x3 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix3 rotationZYX( const Vector3 & radiansXYZ );
+
+    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix3 rotation( float radians, const Vector3 & unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix3 rotation( const Quat & unitQuat );
+
+    // Construct a 3x3 matrix to perform scaling
+    // 
+    static inline const Matrix3 scale( const Vector3 & scaleVec );
+
+};
+// Multiply a 3x3 matrix by a scalar
+// 
+inline const Matrix3 operator *( float scalar, const Matrix3 & mat );
+
+// Append (post-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat );
+
+// Multiply two 3x3 matrices per element
+// 
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
+
+// Compute the absolute value of a 3x3 matrix per element
+// 
+inline const Matrix3 absPerElem( const Matrix3 & mat );
+
+// Transpose of a 3x3 matrix
+// 
+inline const Matrix3 transpose( const Matrix3 & mat );
+
+// Compute the inverse of a 3x3 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix3 inverse( const Matrix3 & mat );
+
+// Determinant of a 3x3 matrix
+// 
+inline float determinant( const Matrix3 & mat );
+
+// Conditionally select between two 3x3 matrices
+// 
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x3 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat );
+
+// Print a 3x3 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat, const char * name );
+
+#endif
+
+// A 4x4 matrix in array-of-structures format
+//
+class Matrix4
+{
+    Vector4 mCol0;
+    Vector4 mCol1;
+    Vector4 mCol2;
+    Vector4 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix4( ) { };
+
+    // Copy a 4x4 matrix
+    // 
+    inline Matrix4( const Matrix4 & mat );
+
+    // Construct a 4x4 matrix containing the specified columns
+    // 
+    inline Matrix4( const Vector4 & col0, const Vector4 & col1, const Vector4 & col2, const Vector4 & col3 );
+
+    // Construct a 4x4 matrix from a 3x4 transformation matrix
+    // 
+    explicit inline Matrix4( const Transform3 & mat );
+
+    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Matrix4( const Matrix3 & mat, const Vector3 & translateVec );
+
+    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Matrix4( const Quat & unitQuat, const Vector3 & translateVec );
+
+    // Set all elements of a 4x4 matrix to the same scalar value
+    // 
+    explicit inline Matrix4( float scalar );
+
+    // Assign one 4x4 matrix to another
+    // 
+    inline Matrix4 & operator =( const Matrix4 & mat );
+
+    // Set the upper-left 3x3 submatrix
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 4x4 matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setTranslation( const Vector3 & translateVec );
+
+    // Get the translation component of a 4x4 matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol0( const Vector4 & col0 );
+
+    // Set column 1 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol1( const Vector4 & col1 );
+
+    // Set column 2 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol2( const Vector4 & col2 );
+
+    // Set column 3 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol3( const Vector4 & col3 );
+
+    // Get column 0 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol0( ) const;
+
+    // Get column 1 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol1( ) const;
+
+    // Get column 2 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol2( ) const;
+
+    // Get column 3 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol3( ) const;
+
+    // Set the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setCol( int col, const Vector4 & vec );
+
+    // Set the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setRow( int row, const Vector4 & vec );
+
+    // Get the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getCol( int col ) const;
+
+    // Get the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector4 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector4 operator []( int col ) const;
+
+    // Set the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline Matrix4 & setElem( int col, int row, float val );
+
+    // Get the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline float getElem( int col, int row ) const;
+
+    // Add two 4x4 matrices
+    // 
+    inline const Matrix4 operator +( const Matrix4 & mat ) const;
+
+    // Subtract a 4x4 matrix from another 4x4 matrix
+    // 
+    inline const Matrix4 operator -( const Matrix4 & mat ) const;
+
+    // Negate all elements of a 4x4 matrix
+    // 
+    inline const Matrix4 operator -( ) const;
+
+    // Multiply a 4x4 matrix by a scalar
+    // 
+    inline const Matrix4 operator *( float scalar ) const;
+
+    // Multiply a 4x4 matrix by a 4-D vector
+    // 
+    inline const Vector4 operator *( const Vector4 & vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D vector
+    // 
+    inline const Vector4 operator *( const Vector3 & vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D point
+    // 
+    inline const Vector4 operator *( const Point3 & pnt ) const;
+
+    // Multiply two 4x4 matrices
+    // 
+    inline const Matrix4 operator *( const Matrix4 & mat ) const;
+
+    // Multiply a 4x4 matrix by a 3x4 transformation matrix
+    // 
+    inline const Matrix4 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and addition with a 4x4 matrix
+    // 
+    inline Matrix4 & operator +=( const Matrix4 & mat );
+
+    // Perform compound assignment and subtraction by a 4x4 matrix
+    // 
+    inline Matrix4 & operator -=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix4 & operator *=( float scalar );
+
+    // Perform compound assignment and multiplication by a 4x4 matrix
+    // 
+    inline Matrix4 & operator *=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Matrix4 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 4x4 matrix
+    // 
+    static inline const Matrix4 identity( );
+
+    // Construct a 4x4 matrix to rotate around the x axis
+    // 
+    static inline const Matrix4 rotationX( float radians );
+
+    // Construct a 4x4 matrix to rotate around the y axis
+    // 
+    static inline const Matrix4 rotationY( float radians );
+
+    // Construct a 4x4 matrix to rotate around the z axis
+    // 
+    static inline const Matrix4 rotationZ( float radians );
+
+    // Construct a 4x4 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix4 rotationZYX( const Vector3 & radiansXYZ );
+
+    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix4 rotation( float radians, const Vector3 & unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix4 rotation( const Quat & unitQuat );
+
+    // Construct a 4x4 matrix to perform scaling
+    // 
+    static inline const Matrix4 scale( const Vector3 & scaleVec );
+
+    // Construct a 4x4 matrix to perform translation
+    // 
+    static inline const Matrix4 translation( const Vector3 & translateVec );
+
+    // Construct viewing matrix based on eye position, position looked at, and up direction
+    // 
+    static inline const Matrix4 lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec );
+
+    // Construct a perspective projection matrix
+    // 
+    static inline const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar );
+
+    // Construct a perspective projection matrix based on frustum
+    // 
+    static inline const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar );
+
+    // Construct an orthographic projection matrix
+    // 
+    static inline const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar );
+
+};
+// Multiply a 4x4 matrix by a scalar
+// 
+inline const Matrix4 operator *( float scalar, const Matrix4 & mat );
+
+// Append (post-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat );
+
+// Multiply two 4x4 matrices per element
+// 
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
+
+// Compute the absolute value of a 4x4 matrix per element
+// 
+inline const Matrix4 absPerElem( const Matrix4 & mat );
+
+// Transpose of a 4x4 matrix
+// 
+inline const Matrix4 transpose( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 inverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 affineInverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+// 
+inline const Matrix4 orthoInverse( const Matrix4 & mat );
+
+// Determinant of a 4x4 matrix
+// 
+inline float determinant( const Matrix4 & mat );
+
+// Conditionally select between two 4x4 matrices
+// 
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4x4 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat );
+
+// Print a 4x4 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat, const char * name );
+
+#endif
+
+// A 3x4 transformation matrix in array-of-structures format
+//
+class Transform3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+    Vector3 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Transform3( ) { };
+
+    // Copy a 3x4 transformation matrix
+    // 
+    inline Transform3( const Transform3 & tfrm );
+
+    // Construct a 3x4 transformation matrix containing the specified columns
+    // 
+    inline Transform3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2, const Vector3 & col3 );
+
+    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Transform3( const Matrix3 & tfrm, const Vector3 & translateVec );
+
+    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Transform3( const Quat & unitQuat, const Vector3 & translateVec );
+
+    // Set all elements of a 3x4 transformation matrix to the same scalar value
+    // 
+    explicit inline Transform3( float scalar );
+
+    // Assign one 3x4 transformation matrix to another
+    // 
+    inline Transform3 & operator =( const Transform3 & tfrm );
+
+    // Set the upper-left 3x3 submatrix
+    // 
+    inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // 
+    inline Transform3 & setTranslation( const Vector3 & translateVec );
+
+    // Get the translation component of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol0( const Vector3 & col0 );
+
+    // Set column 1 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol1( const Vector3 & col1 );
+
+    // Set column 2 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol2( const Vector3 & col2 );
+
+    // Set column 3 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol3( const Vector3 & col3 );
+
+    // Get column 0 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Get column 3 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol3( ) const;
+
+    // Set the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setCol( int col, const Vector3 & vec );
+
+    // Set the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setRow( int row, const Vector4 & vec );
+
+    // Get the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline Transform3 & setElem( int col, int row, float val );
+
+    // Get the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline float getElem( int col, int row ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( const Vector3 & vec ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D point
+    // 
+    inline const Point3 operator *( const Point3 & pnt ) const;
+
+    // Multiply two 3x4 transformation matrices
+    // 
+    inline const Transform3 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Transform3 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 3x4 transformation matrix
+    // 
+    static inline const Transform3 identity( );
+
+    // Construct a 3x4 transformation matrix to rotate around the x axis
+    // 
+    static inline const Transform3 rotationX( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the y axis
+    // 
+    static inline const Transform3 rotationY( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the z axis
+    // 
+    static inline const Transform3 rotationZ( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Transform3 rotationZYX( const Vector3 & radiansXYZ );
+
+    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Transform3 rotation( float radians, const Vector3 & unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Transform3 rotation( const Quat & unitQuat );
+
+    // Construct a 3x4 transformation matrix to perform scaling
+    // 
+    static inline const Transform3 scale( const Vector3 & scaleVec );
+
+    // Construct a 3x4 transformation matrix to perform translation
+    // 
+    static inline const Transform3 translation( const Vector3 & translateVec );
+
+};
+// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm );
+
+// Multiply two 3x4 transformation matrices per element
+// 
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
+
+// Compute the absolute value of a 3x4 transformation matrix per element
+// 
+inline const Transform3 absPerElem( const Transform3 & tfrm );
+
+// Inverse of a 3x4 transformation matrix
+// NOTE: 
+// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+// 
+inline const Transform3 inverse( const Transform3 & tfrm );
+
+// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+// 
+inline const Transform3 orthoInverse( const Transform3 & tfrm );
+
+// Conditionally select between two 3x4 transformation matrices
+// 
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x4 transformation matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm );
+
+// Print a 3x4 transformation matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm, const char * name );
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#include "vec_aos.h"
+#include "quat_aos.h"
+#include "mat_aos.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/mat_aos.h b/Extras/vectormathlibrary/include/vectormath/spu/c/mat_aos.h
index f738e880f..d6b4cb0ba 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/mat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/mat_aos.h
@@ -1,1833 +1,1833 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_AOS_C_H
-#define _VECTORMATH_MAT_AOS_C_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
- */
-#define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_0ZB0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_0 })     
-#define _VECTORMATH_SHUF_C0X0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_YA00 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_Z })
-#define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X })
-#define _VECTORMATH_SHUF_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y })
-#define _VECTORMATH_SHUF_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_ZAY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_BZX0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_0ZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A })
-#define _VECTORMATH_SHUF_Z0XB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_YX0C ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_CZD0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_BBY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-static inline void vmathM3Copy( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Copy( &result->col0, &mat->col0 );
-    vmathV3Copy( &result->col1, &mat->col1 );
-    vmathV3Copy( &result->col2, &mat->col2 );
-}
-
-static inline void vmathM3MakeFromScalar( VmathMatrix3 *result, float scalar )
-{
-    vmathV3MakeFromScalar( &result->col0, scalar );
-    vmathV3MakeFromScalar( &result->col1, scalar );
-    vmathV3MakeFromScalar( &result->col2, scalar );
-}
-
-static inline void vmathM3MakeFromQ( VmathMatrix3 *result, const VmathQuat *unitQuat )
-{
-    vec_float4 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2;
-    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
-    vec_uint4 select_x = (vec_uint4)spu_maskb(0xf000);
-    vec_uint4 select_z = (vec_uint4)spu_maskb(0x00f0);
-    xyzw_2 = spu_add( unitQuat->vec128, unitQuat->vec128 );
-    wwww = spu_shuffle( unitQuat->vec128, unitQuat->vec128, shuffle_wwww );
-    yzxw = spu_shuffle( unitQuat->vec128, unitQuat->vec128, _VECTORMATH_SHUF_YZXW );
-    zxyw = spu_shuffle( unitQuat->vec128, unitQuat->vec128, _VECTORMATH_SHUF_ZXYW );
-    yzxw_2 = spu_shuffle( xyzw_2, xyzw_2, _VECTORMATH_SHUF_YZXW );
-    zxyw_2 = spu_shuffle( xyzw_2, xyzw_2, _VECTORMATH_SHUF_ZXYW );
-    tmp0 = spu_mul( yzxw_2, wwww );
-    tmp1 = spu_nmsub( yzxw, yzxw_2, spu_splats(1.0f) );
-    tmp2 = spu_mul( yzxw, xyzw_2 );
-    tmp0 = spu_madd( zxyw, xyzw_2, tmp0 );
-    tmp1 = spu_nmsub( zxyw, zxyw_2, tmp1 );
-    tmp2 = spu_nmsub( zxyw_2, wwww, tmp2 );
-    tmp3 = spu_sel( tmp0, tmp1, select_x );
-    tmp4 = spu_sel( tmp1, tmp2, select_x );
-    tmp5 = spu_sel( tmp2, tmp0, select_x );
-    result->col0.vec128 = spu_sel( tmp3, tmp2, select_z );
-    result->col1.vec128 = spu_sel( tmp4, tmp0, select_z );
-    result->col2.vec128 = spu_sel( tmp5, tmp1, select_z );
-}
-
-static inline void vmathM3MakeFromCols( VmathMatrix3 *result, const VmathVector3 *_col0, const VmathVector3 *_col1, const VmathVector3 *_col2 )
-{
-    vmathV3Copy( &result->col0, _col0 );
-    vmathV3Copy( &result->col1, _col1 );
-    vmathV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathM3SetCol0( VmathMatrix3 *result, const VmathVector3 *_col0 )
-{
-    vmathV3Copy( &result->col0, _col0 );
-}
-
-static inline void vmathM3SetCol1( VmathMatrix3 *result, const VmathVector3 *_col1 )
-{
-    vmathV3Copy( &result->col1, _col1 );
-}
-
-static inline void vmathM3SetCol2( VmathMatrix3 *result, const VmathVector3 *_col2 )
-{
-    vmathV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathM3SetCol( VmathMatrix3 *result, int col, const VmathVector3 *vec )
-{
-    vmathV3Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathM3SetRow( VmathMatrix3 *result, int row, const VmathVector3 *vec )
-{
-    vmathV3SetElem( &result->col0, row, vmathV3GetElem( vec, 0 ) );
-    vmathV3SetElem( &result->col1, row, vmathV3GetElem( vec, 1 ) );
-    vmathV3SetElem( &result->col2, row, vmathV3GetElem( vec, 2 ) );
-}
-
-static inline void vmathM3SetElem( VmathMatrix3 *result, int col, int row, float val )
-{
-    VmathVector3 tmpV3_0;
-    vmathM3GetCol( &tmpV3_0, result, col );
-    vmathV3SetElem( &tmpV3_0, row, val );
-    vmathM3SetCol( result, col, &tmpV3_0 );
-}
-
-static inline float vmathM3GetElem( const VmathMatrix3 *mat, int col, int row )
-{
-    VmathVector3 tmpV3_0;
-    vmathM3GetCol( &tmpV3_0, mat, col );
-    return vmathV3GetElem( &tmpV3_0, row );
-}
-
-static inline void vmathM3GetCol0( VmathVector3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Copy( result, &mat->col0 );
-}
-
-static inline void vmathM3GetCol1( VmathVector3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Copy( result, &mat->col1 );
-}
-
-static inline void vmathM3GetCol2( VmathVector3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Copy( result, &mat->col2 );
-}
-
-static inline void vmathM3GetCol( VmathVector3 *result, const VmathMatrix3 *mat, int col )
-{
-    vmathV3Copy( result, (&mat->col0 + col) );
-}
-
-static inline void vmathM3GetRow( VmathVector3 *result, const VmathMatrix3 *mat, int row )
-{
-    vmathV3MakeFromElems( result, vmathV3GetElem( &mat->col0, row ), vmathV3GetElem( &mat->col1, row ), vmathV3GetElem( &mat->col2, row ) );
-}
-
-static inline void vmathM3Transpose( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vec_float4 tmp0, tmp1, res0, res1, res2;
-    tmp0 = spu_shuffle( mat->col0.vec128, mat->col2.vec128, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( mat->col0.vec128, mat->col2.vec128, _VECTORMATH_SHUF_ZCWD );
-    res0 = spu_shuffle( tmp0, mat->col1.vec128, _VECTORMATH_SHUF_XAYB );
-    res1 = spu_shuffle( tmp0, mat->col1.vec128, _VECTORMATH_SHUF_ZBW0 );
-    res2 = spu_shuffle( tmp1, mat->col1.vec128, _VECTORMATH_SHUF_XCY0 );
-    result->col0.vec128 = res0;
-    result->col1.vec128 = res1;
-    result->col2.vec128 = res2;
-}
-
-static inline void vmathM3Inverse( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2;
-    tmp2 = _vmathVfCross( mat->col0.vec128, mat->col1.vec128 );
-    tmp0 = _vmathVfCross( mat->col1.vec128, mat->col2.vec128 );
-    tmp1 = _vmathVfCross( mat->col2.vec128, mat->col0.vec128 );
-    dot = _vmathVfDot3( tmp2, mat->col2.vec128 );
-    dot = spu_shuffle( dot, dot, (vec_uchar16)spu_splats(0x00010203) );
-    invdet = recipf4( dot );
-    tmp3 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_XAYB );
-    tmp4 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_ZCWD );
-    inv0 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_XAYB );
-    inv1 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_ZBW0 );
-    inv2 = spu_shuffle( tmp4, tmp1, _VECTORMATH_SHUF_XCY0 );
-    inv0 = spu_mul( inv0, invdet );
-    inv1 = spu_mul( inv1, invdet );
-    inv2 = spu_mul( inv2, invdet );
-    result->col0.vec128 = inv0;
-    result->col1.vec128 = inv1;
-    result->col2.vec128 = inv2;
-}
-
-static inline float vmathM3Determinant( const VmathMatrix3 *mat )
-{
-    VmathVector3 tmpV3_0;
-    vmathV3Cross( &tmpV3_0, &mat->col0, &mat->col1 );
-    return vmathV3Dot( &mat->col2, &tmpV3_0 );
-}
-
-static inline void vmathM3Add( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
-{
-    vmathV3Add( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV3Add( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV3Add( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathM3Sub( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
-{
-    vmathV3Sub( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV3Sub( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV3Sub( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathM3Neg( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Neg( &result->col0, &mat->col0 );
-    vmathV3Neg( &result->col1, &mat->col1 );
-    vmathV3Neg( &result->col2, &mat->col2 );
-}
-
-static inline void vmathM3AbsPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3AbsPerElem( &result->col0, &mat->col0 );
-    vmathV3AbsPerElem( &result->col1, &mat->col1 );
-    vmathV3AbsPerElem( &result->col2, &mat->col2 );
-}
-
-static inline void vmathM3ScalarMul( VmathMatrix3 *result, const VmathMatrix3 *mat, float scalar )
-{
-    vmathV3ScalarMul( &result->col0, &mat->col0, scalar );
-    vmathV3ScalarMul( &result->col1, &mat->col1, scalar );
-    vmathV3ScalarMul( &result->col2, &mat->col2, scalar );
-}
-
-static inline void vmathM3MulV3( VmathVector3 *result, const VmathMatrix3 *mat, const VmathVector3 *vec )
-{
-    vec_float4 res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    xxxx = spu_shuffle( vec->vec128, vec->vec128, shuffle_xxxx );
-    yyyy = spu_shuffle( vec->vec128, vec->vec128, shuffle_yyyy );
-    zzzz = spu_shuffle( vec->vec128, vec->vec128, shuffle_zzzz );
-    res = spu_mul( mat->col0.vec128, xxxx );
-    res = spu_madd( mat->col1.vec128, yyyy, res );
-    res = spu_madd( mat->col2.vec128, zzzz, res );
-    result->vec128 = res;
-}
-
-static inline void vmathM3Mul( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
-{
-    VmathMatrix3 tmpResult;
-    vmathM3MulV3( &tmpResult.col0, mat0, &mat1->col0 );
-    vmathM3MulV3( &tmpResult.col1, mat0, &mat1->col1 );
-    vmathM3MulV3( &tmpResult.col2, mat0, &mat1->col2 );
-    vmathM3Copy( result, &tmpResult );
-}
-
-static inline void vmathM3MulPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
-{
-    vmathV3MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV3MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV3MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathM3MakeIdentity( VmathMatrix3 *result )
-{
-    vmathV3MakeXAxis( &result->col0 );
-    vmathV3MakeYAxis( &result->col1 );
-    vmathV3MakeZAxis( &result->col2 );
-}
-
-static inline void vmathM3MakeRotationX( VmathMatrix3 *result, float radians )
-{
-    vec_float4 s, c, res1, res2;
-    vec_uint4 select_y, select_z;
-    vec_float4 zero;
-    select_y = (vec_uint4)spu_maskb(0x0f00);
-    select_z = (vec_uint4)spu_maskb(0x00f0);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res1 = spu_sel( zero, c, select_y );
-    res1 = spu_sel( res1, s, select_z );
-    res2 = spu_sel( zero, negatef4(s), select_y );
-    res2 = spu_sel( res2, c, select_z );
-    vmathV3MakeXAxis( &result->col0 );
-    result->col1.vec128 = res1;
-    result->col2.vec128 = res2;
-}
-
-static inline void vmathM3MakeRotationY( VmathMatrix3 *result, float radians )
-{
-    vec_float4 s, c, res0, res2;
-    vec_uint4 select_x, select_z;
-    vec_float4 zero;
-    select_x = (vec_uint4)spu_maskb(0xf000);
-    select_z = (vec_uint4)spu_maskb(0x00f0);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res0 = spu_sel( zero, c, select_x );
-    res0 = spu_sel( res0, negatef4(s), select_z );
-    res2 = spu_sel( zero, s, select_x );
-    res2 = spu_sel( res2, c, select_z );
-    result->col0.vec128 = res0;
-    vmathV3MakeYAxis( &result->col1 );
-    result->col2.vec128 = res2;
-}
-
-static inline void vmathM3MakeRotationZ( VmathMatrix3 *result, float radians )
-{
-    vec_float4 s, c, res0, res1;
-    vec_uint4 select_x, select_y;
-    vec_float4 zero;
-    select_x = (vec_uint4)spu_maskb(0xf000);
-    select_y = (vec_uint4)spu_maskb(0x0f00);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res0 = spu_sel( zero, c, select_x );
-    res0 = spu_sel( res0, s, select_y );
-    res1 = spu_sel( zero, negatef4(s), select_x );
-    res1 = spu_sel( res1, c, select_y );
-    result->col0.vec128 = res0;
-    result->col1.vec128 = res1;
-    vmathV3MakeZAxis( &result->col2 );
-}
-
-static inline void vmathM3MakeRotationZYX( VmathMatrix3 *result, const VmathVector3 *radiansXYZ )
-{
-    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    angles = radiansXYZ->vec128;
-    angles = spu_insert( 0.0f, angles, 3 );
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
-    Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
-    Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
-    Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
-    X0 = spu_shuffle( s, s, shuffle_xxxx );
-    X1 = spu_shuffle( c, c, shuffle_xxxx );
-    tmp = spu_mul( Z0, Y1 );
-    result->col0.vec128 = spu_mul( Z0, Y0 );
-    result->col1.vec128 = spu_madd( Z1, X1, spu_mul( tmp, X0 ) );
-    result->col2.vec128 = spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) );
-}
-
-static inline void vmathM3MakeRotationAxis( VmathMatrix3 *result, float radians, const VmathVector3 *unitVec )
-{
-    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    axis = unitVec->vec128;
-    sincosf4( spu_splats( radians ), &s, &c );
-    xxxx = spu_shuffle( axis, axis, shuffle_xxxx );
-    yyyy = spu_shuffle( axis, axis, shuffle_yyyy );
-    zzzz = spu_shuffle( axis, axis, shuffle_zzzz );
-    oneMinusC = spu_sub( spu_splats(1.0f), c );
-    axisS = spu_mul( axis, s );
-    negAxisS = negatef4( axisS );
-    tmp0 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_0ZB0 );
-    tmp1 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_C0X0 );
-    tmp2 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_YA00 );
-    tmp0 = spu_sel( tmp0, c, (vec_uint4)spu_maskb(0xf000) );
-    tmp1 = spu_sel( tmp1, c, (vec_uint4)spu_maskb(0x0f00) );
-    tmp2 = spu_sel( tmp2, c, (vec_uint4)spu_maskb(0x00f0) );
-    result->col0.vec128 = spu_madd( spu_mul( axis, xxxx ), oneMinusC, tmp0 );
-    result->col1.vec128 = spu_madd( spu_mul( axis, yyyy ), oneMinusC, tmp1 );
-    result->col2.vec128 = spu_madd( spu_mul( axis, zzzz ), oneMinusC, tmp2 );
-}
-
-static inline void vmathM3MakeRotationQ( VmathMatrix3 *result, const VmathQuat *unitQuat )
-{
-    vmathM3MakeFromQ( result, unitQuat );
-}
-
-static inline void vmathM3MakeScale( VmathMatrix3 *result, const VmathVector3 *scaleVec )
-{
-    vec_float4 zero = spu_splats(0.0f);
-    result->col0.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0xf000) );
-    result->col1.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0x0f00) );
-    result->col2.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0x00f0) );
-}
-
-static inline void vmathM3AppendScale( VmathMatrix3 *result, const VmathMatrix3 *mat, const VmathVector3 *scaleVec )
-{
-    vmathV3ScalarMul( &result->col0, &mat->col0, vmathV3GetX( scaleVec ) );
-    vmathV3ScalarMul( &result->col1, &mat->col1, vmathV3GetY( scaleVec ) );
-    vmathV3ScalarMul( &result->col2, &mat->col2, vmathV3GetZ( scaleVec ) );
-}
-
-static inline void vmathM3PrependScale( VmathMatrix3 *result, const VmathVector3 *scaleVec, const VmathMatrix3 *mat )
-{
-    vmathV3MulPerElem( &result->col0, &mat->col0, scaleVec );
-    vmathV3MulPerElem( &result->col1, &mat->col1, scaleVec );
-    vmathV3MulPerElem( &result->col2, &mat->col2, scaleVec );
-}
-
-static inline void vmathM3Select( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, unsigned int select1 )
-{
-    vmathV3Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
-    vmathV3Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
-    vmathV3Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathM3Print( const VmathMatrix3 *mat )
-{
-    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2;
-    vmathM3GetRow( &tmpV3_0, mat, 0 );
-    vmathV3Print( &tmpV3_0 );
-    vmathM3GetRow( &tmpV3_1, mat, 1 );
-    vmathV3Print( &tmpV3_1 );
-    vmathM3GetRow( &tmpV3_2, mat, 2 );
-    vmathV3Print( &tmpV3_2 );
-}
-
-static inline void vmathM3Prints( const VmathMatrix3 *mat, const char *name )
-{
-    printf("%s:\n", name);
-    vmathM3Print( mat );
-}
-
-#endif
-
-static inline void vmathM4Copy( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( &result->col0, &mat->col0 );
-    vmathV4Copy( &result->col1, &mat->col1 );
-    vmathV4Copy( &result->col2, &mat->col2 );
-    vmathV4Copy( &result->col3, &mat->col3 );
-}
-
-static inline void vmathM4MakeFromScalar( VmathMatrix4 *result, float scalar )
-{
-    vmathV4MakeFromScalar( &result->col0, scalar );
-    vmathV4MakeFromScalar( &result->col1, scalar );
-    vmathV4MakeFromScalar( &result->col2, scalar );
-    vmathV4MakeFromScalar( &result->col3, scalar );
-}
-
-static inline void vmathM4MakeFromT3( VmathMatrix4 *result, const VmathTransform3 *mat )
-{
-    vmathV4MakeFromV3Scalar( &result->col0, &mat->col0, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col1, &mat->col1, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col2, &mat->col2, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col3, &mat->col3, 1.0f );
-}
-
-static inline void vmathM4MakeFromCols( VmathMatrix4 *result, const VmathVector4 *_col0, const VmathVector4 *_col1, const VmathVector4 *_col2, const VmathVector4 *_col3 )
-{
-    vmathV4Copy( &result->col0, _col0 );
-    vmathV4Copy( &result->col1, _col1 );
-    vmathV4Copy( &result->col2, _col2 );
-    vmathV4Copy( &result->col3, _col3 );
-}
-
-static inline void vmathM4MakeFromM3V3( VmathMatrix4 *result, const VmathMatrix3 *mat, const VmathVector3 *translateVec )
-{
-    vmathV4MakeFromV3Scalar( &result->col0, &mat->col0, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col1, &mat->col1, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col2, &mat->col2, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
-}
-
-static inline void vmathM4MakeFromQV3( VmathMatrix4 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec )
-{
-    VmathMatrix3 mat;
-    vmathM3MakeFromQ( &mat, unitQuat );
-    vmathV4MakeFromV3Scalar( &result->col0, &mat.col0, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col1, &mat.col1, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col2, &mat.col2, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
-}
-
-static inline void vmathM4SetCol0( VmathMatrix4 *result, const VmathVector4 *_col0 )
-{
-    vmathV4Copy( &result->col0, _col0 );
-}
-
-static inline void vmathM4SetCol1( VmathMatrix4 *result, const VmathVector4 *_col1 )
-{
-    vmathV4Copy( &result->col1, _col1 );
-}
-
-static inline void vmathM4SetCol2( VmathMatrix4 *result, const VmathVector4 *_col2 )
-{
-    vmathV4Copy( &result->col2, _col2 );
-}
-
-static inline void vmathM4SetCol3( VmathMatrix4 *result, const VmathVector4 *_col3 )
-{
-    vmathV4Copy( &result->col3, _col3 );
-}
-
-static inline void vmathM4SetCol( VmathMatrix4 *result, int col, const VmathVector4 *vec )
-{
-    vmathV4Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathM4SetRow( VmathMatrix4 *result, int row, const VmathVector4 *vec )
-{
-    vmathV4SetElem( &result->col0, row, vmathV4GetElem( vec, 0 ) );
-    vmathV4SetElem( &result->col1, row, vmathV4GetElem( vec, 1 ) );
-    vmathV4SetElem( &result->col2, row, vmathV4GetElem( vec, 2 ) );
-    vmathV4SetElem( &result->col3, row, vmathV4GetElem( vec, 3 ) );
-}
-
-static inline void vmathM4SetElem( VmathMatrix4 *result, int col, int row, float val )
-{
-    VmathVector4 tmpV3_0;
-    vmathM4GetCol( &tmpV3_0, result, col );
-    vmathV4SetElem( &tmpV3_0, row, val );
-    vmathM4SetCol( result, col, &tmpV3_0 );
-}
-
-static inline float vmathM4GetElem( const VmathMatrix4 *mat, int col, int row )
-{
-    VmathVector4 tmpV4_0;
-    vmathM4GetCol( &tmpV4_0, mat, col );
-    return vmathV4GetElem( &tmpV4_0, row );
-}
-
-static inline void vmathM4GetCol0( VmathVector4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( result, &mat->col0 );
-}
-
-static inline void vmathM4GetCol1( VmathVector4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( result, &mat->col1 );
-}
-
-static inline void vmathM4GetCol2( VmathVector4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( result, &mat->col2 );
-}
-
-static inline void vmathM4GetCol3( VmathVector4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( result, &mat->col3 );
-}
-
-static inline void vmathM4GetCol( VmathVector4 *result, const VmathMatrix4 *mat, int col )
-{
-    vmathV4Copy( result, (&mat->col0 + col) );
-}
-
-static inline void vmathM4GetRow( VmathVector4 *result, const VmathMatrix4 *mat, int row )
-{
-    vmathV4MakeFromElems( result, vmathV4GetElem( &mat->col0, row ), vmathV4GetElem( &mat->col1, row ), vmathV4GetElem( &mat->col2, row ), vmathV4GetElem( &mat->col3, row ) );
-}
-
-static inline void vmathM4Transpose( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3;
-    tmp0 = spu_shuffle( mat->col0.vec128, mat->col2.vec128, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( mat->col1.vec128, mat->col3.vec128, _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( mat->col0.vec128, mat->col2.vec128, _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( mat->col1.vec128, mat->col3.vec128, _VECTORMATH_SHUF_ZCWD );
-    res0 = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
-    res1 = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
-    res2 = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
-    res3 = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
-    result->col0.vec128 = res0;
-    result->col1.vec128 = res1;
-    result->col2.vec128 = res2;
-    result->col3.vec128 = res3;
-}
-
-static inline void vmathM4Inverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vec_float4 in0, in1, in2, in3;
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    vec_float4 cof0, cof1, cof2, cof3;
-    vec_float4 t0, t1, t2, t3;
-    vec_float4 t01, t02, t03, t12, t23;
-    vec_float4 t1r, t2r;
-    vec_float4 t01r, t02r, t03r, t12r, t23r;
-    vec_float4 t1r3, t1r3r;
-    vec_float4 det, det1, det2, det3, invdet;
-    in0 = mat->col0.vec128;
-    in1 = mat->col1.vec128;
-    in2 = mat->col2.vec128;
-    in3 = mat->col3.vec128;
-    /* Perform transform of the input matrix of the form:
-     *    A B C D
-     *    E F G H
-     *    I J K L
-     *    M N O P
-     *
-     * The pseudo transpose of the input matrix is trans:
-     *    A E I M
-     *    J N B F
-     *    C G K O
-     *    L P D H
-     */
-    tmp0 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_XAZC);    /* A E C G */
-    tmp1 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_XAZC);    /* I M K O */
-    tmp2 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_YBWD);    /* B F D H */
-    tmp3 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_YBWD);    /* J N L P */
-    t0 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_XYAB);    /* A E I M */
-    t1 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_XYAB);    /* J N B F */
-    t2 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_ZWCD);    /* C G K O */
-    t3 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_ZWCD);    /* L P D H */
-    /* Generate a cofactor matrix. The computed cofactors reside in
-     * cof0, cof1, cof2, cof3.
-     */
-    t23 = spu_mul(t2, t3);                        /* CL GP KD OH */
-    t23 = spu_shuffle(t23, t23, _VECTORMATH_SHUF_YXWZ);     /* GP CL OH KD */
-    cof0 = spu_mul(t1, t23);                      /* JGP NCL BOH FKD */
-    cof1 = spu_mul(t0, t23);                      /* AGP ECL IOH MKD */
-    t23r = spu_rlqwbyte(t23, 8);                  /* OH KD GP CL */
-    cof0 = spu_msub(t1, t23r, cof0);              /* JOH NKD BGP FCL  - cof0 */
-    cof1 = spu_msub(t0, t23r, cof1);              /* AOH EKD IGP MCL  - cof1 */
-    cof1 = spu_rlqwbyte(cof1, 8);                 /* IGP MCL AOH EKD - IOH MKD AGP ECL */
-
-    t12 = spu_mul(t1, t2);                        /* JC NG BK FO */
-    t12 = spu_shuffle(t12, t12, _VECTORMATH_SHUF_YXWZ);     /* NG JC FO BK */
-    cof0 = spu_madd(t3, t12, cof0);               /* LNG PJC DFO HBK + cof0 */
-    cof3 = spu_mul(t0, t12);                      /* ANG EJC IFO MBK */
-    t12r = spu_rlqwbyte(t12, 8);                  /* FO BK NG JC */
-    cof0 = spu_nmsub(t3, t12r, cof0);             /* cof0 - LFO PBK DNG HJC */
-    cof3 = spu_msub(t0, t12r, cof3);              /* AFO EBK ING MJC - cof3 */
-    cof3 = spu_rlqwbyte(cof3, 8);                 /* ING MJC AFO EBK - IFO MBK ANG EJC */
-    t1r = spu_rlqwbyte(t1, 8);                    /* B F J N */
-    t2r = spu_rlqwbyte(t2, 8);                    /* K O C G */
-    t1r3 = spu_mul(t1r, t3);                      /* BL FP JD NH */
-    t1r3 = spu_shuffle(t1r3, t1r3, _VECTORMATH_SHUF_YXWZ);  /* FP BL NH JD */
-    cof0 = spu_madd(t2r, t1r3, cof0);             /* KFP OBL CNH GJD + cof0 */
-    cof2 = spu_mul(t0, t1r3);                     /* AFP EBL INH MJD */
-    t1r3r = spu_rlqwbyte(t1r3, 8);                /* NH JD FP BL */
-    cof0 = spu_nmsub(t2r, t1r3r, cof0);           /* cof0 - KNH OJD CFP GBL */
-    cof2 = spu_msub(t0, t1r3r, cof2);             /* ANH EJD IFP MBL - cof2 */
-    cof2 = spu_rlqwbyte(cof2, 8);                 /* IFP MBL ANH EJD - INH MJD AFP EBL */
-    t01 = spu_mul(t0, t1);                                /* AJ EN IB MF */
-    t01 = spu_shuffle(t01, t01, _VECTORMATH_SHUF_YXWZ);     /* EN AJ MF IB */
-    cof2 = spu_madd(t3, t01, cof2);               /* LEN PAJ DMF HIB + cof2 */
-    cof3 = spu_msub(t2r, t01, cof3);              /* KEN OAJ CMF GIB - cof3 */
-    t01r = spu_rlqwbyte(t01, 8);                  /* MF IB EN AJ */
-    cof2 = spu_msub(t3, t01r, cof2);              /* LMF PIB DEN HAJ - cof2 */
-    cof3 = spu_nmsub(t2r, t01r, cof3);            /* cof3 - KMF OIB CEN GAJ */
-    t03 = spu_mul(t0, t3);                                /* AL EP ID MH */
-    t03 = spu_shuffle(t03, t03, _VECTORMATH_SHUF_YXWZ);     /* EP AL MH ID */
-    cof1 = spu_nmsub(t2r, t03, cof1);             /* cof1 - KEP OAL CMH GID */
-    cof2 = spu_madd(t1, t03, cof2);               /* JEP NAL BMH FID + cof2 */
-    t03r = spu_rlqwbyte(t03, 8);                  /* MH ID EP AL */
-    cof1 = spu_madd(t2r, t03r, cof1);             /* KMH OID CEP GAL + cof1 */
-    cof2 = spu_nmsub(t1, t03r, cof2);             /* cof2 - JMH NID BEP FAL */
-    t02 = spu_mul(t0, t2r);                       /* AK EO IC MG */
-    t02 = spu_shuffle(t02, t02, _VECTORMATH_SHUF_YXWZ);     /* E0 AK MG IC */
-    cof1 = spu_madd(t3, t02, cof1);               /* LEO PAK DMG HIC + cof1 */
-    cof3 = spu_nmsub(t1, t02, cof3);              /* cof3 - JEO NAK BMG FIC */
-    t02r = spu_rlqwbyte(t02, 8);                  /* MG IC EO AK */
-    cof1 = spu_nmsub(t3, t02r, cof1);             /* cof1 - LMG PIC DEO HAK */
-    cof3 = spu_madd(t1, t02r, cof3);              /* JMG NIC BEO FAK + cof3 */
-    /* Compute the determinant of the matrix
-     *
-     * det = sum_across(t0 * cof0);
-     *
-     * We perform a sum across the entire vector so that
-     * we don't have to splat the result when multiplying the
-     * cofactors by the inverse of the determinant.
-     */
-    det  = spu_mul(t0, cof0);
-    det1 = spu_rlqwbyte(det, 4);
-    det2 = spu_rlqwbyte(det, 8);
-    det3 = spu_rlqwbyte(det, 12);
-    det  = spu_add(det, det1);
-    det2 = spu_add(det2, det3);
-    det  = spu_add(det, det2);
-    /* Compute the reciprocal of the determinant.
-     */
-    invdet = recipf4(det);
-    /* Multiply the cofactors by the reciprocal of the determinant.
-     */
-    result->col0.vec128 = spu_mul(cof0, invdet);
-    result->col1.vec128 = spu_mul(cof1, invdet);
-    result->col2.vec128 = spu_mul(cof2, invdet);
-    result->col3.vec128 = spu_mul(cof3, invdet);
-}
-
-static inline void vmathM4AffineInverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    VmathTransform3 affineMat, tmpT3_0;
-    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    vmathV4GetXYZ( &tmpV3_0, &mat->col0 );
-    vmathT3SetCol0( &affineMat, &tmpV3_0 );
-    vmathV4GetXYZ( &tmpV3_1, &mat->col1 );
-    vmathT3SetCol1( &affineMat, &tmpV3_1 );
-    vmathV4GetXYZ( &tmpV3_2, &mat->col2 );
-    vmathT3SetCol2( &affineMat, &tmpV3_2 );
-    vmathV4GetXYZ( &tmpV3_3, &mat->col3 );
-    vmathT3SetCol3( &affineMat, &tmpV3_3 );
-    vmathT3Inverse( &tmpT3_0, &affineMat );
-    vmathM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline void vmathM4OrthoInverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    VmathTransform3 affineMat, tmpT3_0;
-    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    vmathV4GetXYZ( &tmpV3_0, &mat->col0 );
-    vmathT3SetCol0( &affineMat, &tmpV3_0 );
-    vmathV4GetXYZ( &tmpV3_1, &mat->col1 );
-    vmathT3SetCol1( &affineMat, &tmpV3_1 );
-    vmathV4GetXYZ( &tmpV3_2, &mat->col2 );
-    vmathT3SetCol2( &affineMat, &tmpV3_2 );
-    vmathV4GetXYZ( &tmpV3_3, &mat->col3 );
-    vmathT3SetCol3( &affineMat, &tmpV3_3 );
-    vmathT3OrthoInverse( &tmpT3_0, &affineMat );
-    vmathM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline float vmathM4Determinant( const VmathMatrix4 *mat )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vec_float4 in0, in1, in2, in3;
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    vec_float4 cof0;
-    vec_float4 t0, t1, t2, t3;
-    vec_float4 t12, t23;
-    vec_float4 t1r, t2r;
-    vec_float4 t12r, t23r;
-    vec_float4 t1r3, t1r3r;
-    in0 = mat->col0.vec128;
-    in1 = mat->col1.vec128;
-    in2 = mat->col2.vec128;
-    in3 = mat->col3.vec128;
-    /* Perform transform of the input matrix of the form:
-     *    A B C D
-     *    E F G H
-     *    I J K L
-     *    M N O P
-     *
-     * The pseudo transpose of the input matrix is trans:
-     *    A E I M
-     *    J N B F
-     *    C G K O
-     *    L P D H
-     */
-    tmp0 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_XAZC);    /* A E C G */
-    tmp1 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_XAZC);    /* I M K O */
-    tmp2 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_YBWD);    /* B F D H */
-    tmp3 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_YBWD);    /* J N L P */
-    t0 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_XYAB);    /* A E I M */
-    t1 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_XYAB);    /* J N B F */
-    t2 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_ZWCD);    /* C G K O */
-    t3 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_ZWCD);    /* L P D H */
-    /* Generate a cofactor matrix. The computed cofactors reside in
-     * cof0, cof1, cof2, cof3.
-     */
-    t23 = spu_mul(t2, t3);                        /* CL GP KD OH */
-    t23 = spu_shuffle(t23, t23, _VECTORMATH_SHUF_YXWZ);     /* GP CL OH KD */
-    cof0 = spu_mul(t1, t23);                      /* JGP NCL BOH FKD */
-    t23r = spu_rlqwbyte(t23, 8);                  /* OH KD GP CL */
-    cof0 = spu_msub(t1, t23r, cof0);              /* JOH NKD BGP FCL  - cof0 */
-
-    t12 = spu_mul(t1, t2);                        /* JC NG BK FO */
-    t12 = spu_shuffle(t12, t12, _VECTORMATH_SHUF_YXWZ);     /* NG JC FO BK */
-    cof0 = spu_madd(t3, t12, cof0);               /* LNG PJC DFO HBK + cof0 */
-    t12r = spu_rlqwbyte(t12, 8);                  /* FO BK NG JC */
-    cof0 = spu_nmsub(t3, t12r, cof0);             /* cof0 - LFO PBK DNG HJC */
-    t1r = spu_rlqwbyte(t1, 8);                    /* B F J N */
-    t2r = spu_rlqwbyte(t2, 8);                    /* K O C G */
-    t1r3 = spu_mul(t1r, t3);                      /* BL FP JD NH */
-    t1r3 = spu_shuffle(t1r3, t1r3, _VECTORMATH_SHUF_YXWZ);  /* FP BL NH JD */
-    cof0 = spu_madd(t2r, t1r3, cof0);             /* KFP OBL CNH GJD + cof0 */
-    t1r3r = spu_rlqwbyte(t1r3, 8);                /* NH JD FP BL */
-    cof0 = spu_nmsub(t2r, t1r3r, cof0);           /* cof0 - KNH OJD CFP GBL */
-    return spu_extract( _vmathVfDot4(t0,cof0), 0 );
-}
-
-static inline void vmathM4Add( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
-{
-    vmathV4Add( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV4Add( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV4Add( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathV4Add( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathM4Sub( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
-{
-    vmathV4Sub( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV4Sub( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV4Sub( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathV4Sub( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathM4Neg( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Neg( &result->col0, &mat->col0 );
-    vmathV4Neg( &result->col1, &mat->col1 );
-    vmathV4Neg( &result->col2, &mat->col2 );
-    vmathV4Neg( &result->col3, &mat->col3 );
-}
-
-static inline void vmathM4AbsPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4AbsPerElem( &result->col0, &mat->col0 );
-    vmathV4AbsPerElem( &result->col1, &mat->col1 );
-    vmathV4AbsPerElem( &result->col2, &mat->col2 );
-    vmathV4AbsPerElem( &result->col3, &mat->col3 );
-}
-
-static inline void vmathM4ScalarMul( VmathMatrix4 *result, const VmathMatrix4 *mat, float scalar )
-{
-    vmathV4ScalarMul( &result->col0, &mat->col0, scalar );
-    vmathV4ScalarMul( &result->col1, &mat->col1, scalar );
-    vmathV4ScalarMul( &result->col2, &mat->col2, scalar );
-    vmathV4ScalarMul( &result->col3, &mat->col3, scalar );
-}
-
-static inline void vmathM4MulV4( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector4 *vec )
-{
-    vec_float4 tmp0, tmp1, res;
-    vec_float4 xxxx, yyyy, zzzz, wwww;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
-    xxxx = spu_shuffle( vec->vec128, vec->vec128, shuffle_xxxx );
-    yyyy = spu_shuffle( vec->vec128, vec->vec128, shuffle_yyyy );
-    zzzz = spu_shuffle( vec->vec128, vec->vec128, shuffle_zzzz );
-    wwww = spu_shuffle( vec->vec128, vec->vec128, shuffle_wwww );
-    tmp0 = spu_mul( mat->col0.vec128, xxxx );
-    tmp1 = spu_mul( mat->col1.vec128, yyyy );
-    tmp0 = spu_madd( mat->col2.vec128, zzzz, tmp0 );
-    tmp1 = spu_madd( mat->col3.vec128, wwww, tmp1 );
-    res = spu_add( tmp0, tmp1 );
-    result->vec128 = res;
-}
-
-static inline void vmathM4MulV3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector3 *vec )
-{
-    vec_float4 res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    xxxx = spu_shuffle( vec->vec128, vec->vec128, shuffle_xxxx );
-    yyyy = spu_shuffle( vec->vec128, vec->vec128, shuffle_yyyy );
-    zzzz = spu_shuffle( vec->vec128, vec->vec128, shuffle_zzzz );
-    res = spu_mul( mat->col0.vec128, xxxx );
-    res = spu_madd( mat->col1.vec128, yyyy, res );
-    res = spu_madd( mat->col2.vec128, zzzz, res );
-    result->vec128 = res;
-}
-
-static inline void vmathM4MulP3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathPoint3 *pnt )
-{
-    vec_float4 tmp0, tmp1, res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    xxxx = spu_shuffle( pnt->vec128, pnt->vec128, shuffle_xxxx );
-    yyyy = spu_shuffle( pnt->vec128, pnt->vec128, shuffle_yyyy );
-    zzzz = spu_shuffle( pnt->vec128, pnt->vec128, shuffle_zzzz );
-    tmp0 = spu_mul( mat->col0.vec128, xxxx );
-    tmp1 = spu_mul( mat->col1.vec128, yyyy );
-    tmp0 = spu_madd( mat->col2.vec128, zzzz, tmp0 );
-    tmp1 = spu_add( mat->col3.vec128, tmp1 );
-    res = spu_add( tmp0, tmp1 );
-    result->vec128 = res;
-}
-
-static inline void vmathM4Mul( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
-{
-    VmathMatrix4 tmpResult;
-    vmathM4MulV4( &tmpResult.col0, mat0, &mat1->col0 );
-    vmathM4MulV4( &tmpResult.col1, mat0, &mat1->col1 );
-    vmathM4MulV4( &tmpResult.col2, mat0, &mat1->col2 );
-    vmathM4MulV4( &tmpResult.col3, mat0, &mat1->col3 );
-    vmathM4Copy( result, &tmpResult );
-}
-
-static inline void vmathM4MulT3( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathTransform3 *tfrm1 )
-{
-    VmathMatrix4 tmpResult;
-    VmathPoint3 tmpP3_0;
-    vmathM4MulV3( &tmpResult.col0, mat, &tfrm1->col0 );
-    vmathM4MulV3( &tmpResult.col1, mat, &tfrm1->col1 );
-    vmathM4MulV3( &tmpResult.col2, mat, &tfrm1->col2 );
-    vmathP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
-    vmathM4MulP3( &tmpResult.col3, mat, &tmpP3_0 );
-    vmathM4Copy( result, &tmpResult );
-}
-
-static inline void vmathM4MulPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
-{
-    vmathV4MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV4MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV4MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathV4MulPerElem( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathM4MakeIdentity( VmathMatrix4 *result )
-{
-    vmathV4MakeXAxis( &result->col0 );
-    vmathV4MakeYAxis( &result->col1 );
-    vmathV4MakeZAxis( &result->col2 );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4SetUpper3x3( VmathMatrix4 *result, const VmathMatrix3 *mat3 )
-{
-    vmathV4SetXYZ( &result->col0, &mat3->col0 );
-    vmathV4SetXYZ( &result->col1, &mat3->col1 );
-    vmathV4SetXYZ( &result->col2, &mat3->col2 );
-}
-
-static inline void vmathM4GetUpper3x3( VmathMatrix3 *result, const VmathMatrix4 *mat )
-{
-    vmathV4GetXYZ( &result->col0, &mat->col0 );
-    vmathV4GetXYZ( &result->col1, &mat->col1 );
-    vmathV4GetXYZ( &result->col2, &mat->col2 );
-}
-
-static inline void vmathM4SetTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec )
-{
-    vmathV4SetXYZ( &result->col3, translateVec );
-}
-
-static inline void vmathM4GetTranslation( VmathVector3 *result, const VmathMatrix4 *mat )
-{
-    vmathV4GetXYZ( result, &mat->col3 );
-}
-
-static inline void vmathM4MakeRotationX( VmathMatrix4 *result, float radians )
-{
-    vec_float4 s, c, res1, res2;
-    vec_uint4 select_y, select_z;
-    vec_float4 zero;
-    select_y = (vec_uint4)spu_maskb(0x0f00);
-    select_z = (vec_uint4)spu_maskb(0x00f0);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res1 = spu_sel( zero, c, select_y );
-    res1 = spu_sel( res1, s, select_z );
-    res2 = spu_sel( zero, negatef4(s), select_y );
-    res2 = spu_sel( res2, c, select_z );
-    vmathV4MakeXAxis( &result->col0 );
-    result->col1.vec128 = res1;
-    result->col2.vec128 = res2;
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationY( VmathMatrix4 *result, float radians )
-{
-    vec_float4 s, c, res0, res2;
-    vec_uint4 select_x, select_z;
-    vec_float4 zero;
-    select_x = (vec_uint4)spu_maskb(0xf000);
-    select_z = (vec_uint4)spu_maskb(0x00f0);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res0 = spu_sel( zero, c, select_x );
-    res0 = spu_sel( res0, negatef4(s), select_z );
-    res2 = spu_sel( zero, s, select_x );
-    res2 = spu_sel( res2, c, select_z );
-    result->col0.vec128 = res0;
-    vmathV4MakeYAxis( &result->col1 );
-    result->col2.vec128 = res2;
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationZ( VmathMatrix4 *result, float radians )
-{
-    vec_float4 s, c, res0, res1;
-    vec_uint4 select_x, select_y;
-    vec_float4 zero;
-    select_x = (vec_uint4)spu_maskb(0xf000);
-    select_y = (vec_uint4)spu_maskb(0x0f00);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res0 = spu_sel( zero, c, select_x );
-    res0 = spu_sel( res0, s, select_y );
-    res1 = spu_sel( zero, negatef4(s), select_x );
-    res1 = spu_sel( res1, c, select_y );
-    result->col0.vec128 = res0;
-    result->col1.vec128 = res1;
-    vmathV4MakeZAxis( &result->col2 );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationZYX( VmathMatrix4 *result, const VmathVector3 *radiansXYZ )
-{
-    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    angles = radiansXYZ->vec128;
-    angles = spu_insert( 0.0f, angles, 3 );
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
-    Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
-    Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
-    Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
-    X0 = spu_shuffle( s, s, shuffle_xxxx );
-    X1 = spu_shuffle( c, c, shuffle_xxxx );
-    tmp = spu_mul( Z0, Y1 );
-    result->col0.vec128 = spu_mul( Z0, Y0 );
-    result->col1.vec128 = spu_madd( Z1, X1, spu_mul( tmp, X0 ) );
-    result->col2.vec128 = spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationAxis( VmathMatrix4 *result, float radians, const VmathVector3 *unitVec )
-{
-    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2, zeroW;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    axis = unitVec->vec128;
-    sincosf4( spu_splats( radians ), &s, &c );
-    xxxx = spu_shuffle( axis, axis, shuffle_xxxx );
-    yyyy = spu_shuffle( axis, axis, shuffle_yyyy );
-    zzzz = spu_shuffle( axis, axis, shuffle_zzzz );
-    oneMinusC = spu_sub( spu_splats(1.0f), c );
-    axisS = spu_mul( axis, s );
-    negAxisS = negatef4( axisS );
-    tmp0 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_0ZB0 );
-    tmp1 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_C0X0 );
-    tmp2 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_YA00 );
-    tmp0 = spu_sel( tmp0, c, (vec_uint4)spu_maskb(0xf000) );
-    tmp1 = spu_sel( tmp1, c, (vec_uint4)spu_maskb(0x0f00) );
-    tmp2 = spu_sel( tmp2, c, (vec_uint4)spu_maskb(0x00f0) );
-    zeroW = (vec_float4)spu_maskb(0x000f);
-    axis = spu_andc( axis, zeroW );
-    result->col0.vec128 = spu_madd( spu_mul( axis, xxxx ), oneMinusC, tmp0 );
-    result->col1.vec128 = spu_madd( spu_mul( axis, yyyy ), oneMinusC, tmp1 );
-    result->col2.vec128 = spu_madd( spu_mul( axis, zzzz ), oneMinusC, tmp2 );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationQ( VmathMatrix4 *result, const VmathQuat *unitQuat )
-{
-    VmathTransform3 tmpT3_0;
-    vmathT3MakeRotationQ( &tmpT3_0, unitQuat );
-    vmathM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline void vmathM4MakeScale( VmathMatrix4 *result, const VmathVector3 *scaleVec )
-{
-    vec_float4 zero = spu_splats(0.0f);
-    result->col0.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0xf000) );
-    result->col1.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0x0f00) );
-    result->col2.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0x00f0) );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4AppendScale( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathVector3 *scaleVec )
-{
-    vmathV4ScalarMul( &result->col0, &mat->col0, vmathV3GetX( scaleVec ) );
-    vmathV4ScalarMul( &result->col1, &mat->col1, vmathV3GetY( scaleVec ) );
-    vmathV4ScalarMul( &result->col2, &mat->col2, vmathV3GetZ( scaleVec ) );
-    vmathV4Copy( &result->col3, &mat->col3 );
-}
-
-static inline void vmathM4PrependScale( VmathMatrix4 *result, const VmathVector3 *scaleVec, const VmathMatrix4 *mat )
-{
-    VmathVector4 scale4;
-    vmathV4MakeFromV3Scalar( &scale4, scaleVec, 1.0f );
-    vmathV4MulPerElem( &result->col0, &mat->col0, &scale4 );
-    vmathV4MulPerElem( &result->col1, &mat->col1, &scale4 );
-    vmathV4MulPerElem( &result->col2, &mat->col2, &scale4 );
-    vmathV4MulPerElem( &result->col3, &mat->col3, &scale4 );
-}
-
-static inline void vmathM4MakeTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec )
-{
-    vmathV4MakeXAxis( &result->col0 );
-    vmathV4MakeYAxis( &result->col1 );
-    vmathV4MakeZAxis( &result->col2 );
-    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
-}
-
-static inline void vmathM4MakeLookAt( VmathMatrix4 *result, const VmathPoint3 *eyePos, const VmathPoint3 *lookAtPos, const VmathVector3 *upVec )
-{
-    VmathMatrix4 m4EyeFrame;
-    VmathVector3 v3X, v3Y, v3Z, tmpV3_0, tmpV3_1;
-    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
-    vmathV3Normalize( &v3Y, upVec );
-    vmathP3Sub( &tmpV3_0, eyePos, lookAtPos );
-    vmathV3Normalize( &v3Z, &tmpV3_0 );
-    vmathV3Cross( &tmpV3_1, &v3Y, &v3Z );
-    vmathV3Normalize( &v3X, &tmpV3_1 );
-    vmathV3Cross( &v3Y, &v3Z, &v3X );
-    vmathV4MakeFromV3( &tmpV4_0, &v3X );
-    vmathV4MakeFromV3( &tmpV4_1, &v3Y );
-    vmathV4MakeFromV3( &tmpV4_2, &v3Z );
-    vmathV4MakeFromP3( &tmpV4_3, eyePos );
-    vmathM4MakeFromCols( &m4EyeFrame, &tmpV4_0, &tmpV4_1, &tmpV4_2, &tmpV4_3 );
-    vmathM4OrthoInverse( result, &m4EyeFrame );
-}
-
-static inline void vmathM4MakePerspective( VmathMatrix4 *result, float fovyRadians, float aspect, float zNear, float zFar )
-{
-    float f, rangeInv;
-    vec_float4 zero, col0, col1, col2, col3;
-    f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f );
-    rangeInv = 1.0f / ( zNear - zFar );
-    zero = spu_splats(0.0f);
-    col0 = zero;
-    col1 = zero;
-    col2 = zero;
-    col3 = zero;
-    col0 = spu_insert( f / aspect, col0, 0 );
-    col1 = spu_insert( f, col1, 1 );
-    col2 = spu_insert( ( zNear + zFar ) * rangeInv, col2, 2 );
-    col2 = spu_insert( -1.0f, col2, 3 );
-    col3 = spu_insert( zNear * zFar * rangeInv * 2.0f, col3, 2 );
-    result->col0.vec128 = col0;
-    result->col1.vec128 = col1;
-    result->col2.vec128 = col2;
-    result->col3.vec128 = col3;
-}
-
-static inline void vmathM4MakeFrustum( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vec_float4 lbf, rtn;
-    vec_float4 diff, sum, inv_diff;
-    vec_float4 diagonal, column, near2;
-    vec_float4 zero = spu_splats(0.0f);
-    lbf = spu_shuffle( spu_promote(left,0), spu_promote(zFar,0), _VECTORMATH_SHUF_XAYB );
-    rtn = spu_shuffle( spu_promote(right,0), spu_promote(zNear,0), _VECTORMATH_SHUF_XAYB );
-    lbf = spu_shuffle( lbf, spu_promote(bottom,0), _VECTORMATH_SHUF_XAYB );
-    rtn = spu_shuffle( rtn, spu_promote(top,0), _VECTORMATH_SHUF_XAYB );
-    diff = spu_sub( rtn, lbf );
-    sum  = spu_add( rtn, lbf );
-    inv_diff = recipf4( diff );
-    near2 = spu_splats( zNear );
-    near2 = spu_add( near2, near2 );
-    diagonal = spu_mul( near2, inv_diff );
-    column = spu_mul( sum, inv_diff );
-    result->col0.vec128 = spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0xf000) );
-    result->col1.vec128 = spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x0f00) );
-    result->col2.vec128 = spu_sel( column, spu_splats(-1.0f), (vec_uint4)spu_maskb(0x000f) );
-    result->col3.vec128 = spu_sel( zero, spu_mul( diagonal, spu_splats(zFar) ), (vec_uint4)spu_maskb(0x00f0) );
-}
-
-static inline void vmathM4MakeOrthographic( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vec_float4 lbf, rtn;
-    vec_float4 diff, sum, inv_diff, neg_inv_diff;
-    vec_float4 diagonal, column;
-    vec_float4 zero = spu_splats(0.0f);
-    lbf = spu_shuffle( spu_promote(left,0), spu_promote(zFar,0), _VECTORMATH_SHUF_XAYB );
-    rtn = spu_shuffle( spu_promote(right,0), spu_promote(zNear,0), _VECTORMATH_SHUF_XAYB );
-    lbf = spu_shuffle( lbf, spu_promote(bottom,0), _VECTORMATH_SHUF_XAYB );
-    rtn = spu_shuffle( rtn, spu_promote(top,0), _VECTORMATH_SHUF_XAYB );
-    diff = spu_sub( rtn, lbf );
-    sum  = spu_add( rtn, lbf );
-    inv_diff = recipf4( diff );
-    neg_inv_diff = negatef4( inv_diff );
-    diagonal = spu_add( inv_diff, inv_diff );
-    column = spu_mul( sum, spu_sel( neg_inv_diff, inv_diff, (vec_uint4)spu_maskb(0x00f0) ) );
-    result->col0.vec128 = spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0xf000) );
-    result->col1.vec128 = spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x0f00) );
-    result->col2.vec128 = spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x00f0) );
-    result->col3.vec128 = spu_sel( column, spu_splats(1.0f), (vec_uint4)spu_maskb(0x000f) );
-}
-
-static inline void vmathM4Select( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, unsigned int select1 )
-{
-    vmathV4Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
-    vmathV4Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
-    vmathV4Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
-    vmathV4Select( &result->col3, &mat0->col3, &mat1->col3, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathM4Print( const VmathMatrix4 *mat )
-{
-    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
-    vmathM4GetRow( &tmpV4_0, mat, 0 );
-    vmathV4Print( &tmpV4_0 );
-    vmathM4GetRow( &tmpV4_1, mat, 1 );
-    vmathV4Print( &tmpV4_1 );
-    vmathM4GetRow( &tmpV4_2, mat, 2 );
-    vmathV4Print( &tmpV4_2 );
-    vmathM4GetRow( &tmpV4_3, mat, 3 );
-    vmathV4Print( &tmpV4_3 );
-}
-
-static inline void vmathM4Prints( const VmathMatrix4 *mat, const char *name )
-{
-    printf("%s:\n", name);
-    vmathM4Print( mat );
-}
-
-#endif
-
-static inline void vmathT3Copy( VmathTransform3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( &result->col0, &tfrm->col0 );
-    vmathV3Copy( &result->col1, &tfrm->col1 );
-    vmathV3Copy( &result->col2, &tfrm->col2 );
-    vmathV3Copy( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathT3MakeFromScalar( VmathTransform3 *result, float scalar )
-{
-    vmathV3MakeFromScalar( &result->col0, scalar );
-    vmathV3MakeFromScalar( &result->col1, scalar );
-    vmathV3MakeFromScalar( &result->col2, scalar );
-    vmathV3MakeFromScalar( &result->col3, scalar );
-}
-
-static inline void vmathT3MakeFromCols( VmathTransform3 *result, const VmathVector3 *_col0, const VmathVector3 *_col1, const VmathVector3 *_col2, const VmathVector3 *_col3 )
-{
-    vmathV3Copy( &result->col0, _col0 );
-    vmathV3Copy( &result->col1, _col1 );
-    vmathV3Copy( &result->col2, _col2 );
-    vmathV3Copy( &result->col3, _col3 );
-}
-
-static inline void vmathT3MakeFromM3V3( VmathTransform3 *result, const VmathMatrix3 *tfrm, const VmathVector3 *translateVec )
-{
-    vmathT3SetUpper3x3( result, tfrm );
-    vmathT3SetTranslation( result, translateVec );
-}
-
-static inline void vmathT3MakeFromQV3( VmathTransform3 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec )
-{
-    VmathMatrix3 tmpM3_0;
-    vmathM3MakeFromQ( &tmpM3_0, unitQuat );
-    vmathT3SetUpper3x3( result, &tmpM3_0 );
-    vmathT3SetTranslation( result, translateVec );
-}
-
-static inline void vmathT3SetCol0( VmathTransform3 *result, const VmathVector3 *_col0 )
-{
-    vmathV3Copy( &result->col0, _col0 );
-}
-
-static inline void vmathT3SetCol1( VmathTransform3 *result, const VmathVector3 *_col1 )
-{
-    vmathV3Copy( &result->col1, _col1 );
-}
-
-static inline void vmathT3SetCol2( VmathTransform3 *result, const VmathVector3 *_col2 )
-{
-    vmathV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathT3SetCol3( VmathTransform3 *result, const VmathVector3 *_col3 )
-{
-    vmathV3Copy( &result->col3, _col3 );
-}
-
-static inline void vmathT3SetCol( VmathTransform3 *result, int col, const VmathVector3 *vec )
-{
-    vmathV3Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathT3SetRow( VmathTransform3 *result, int row, const VmathVector4 *vec )
-{
-    vmathV3SetElem( &result->col0, row, vmathV4GetElem( vec, 0 ) );
-    vmathV3SetElem( &result->col1, row, vmathV4GetElem( vec, 1 ) );
-    vmathV3SetElem( &result->col2, row, vmathV4GetElem( vec, 2 ) );
-    vmathV3SetElem( &result->col3, row, vmathV4GetElem( vec, 3 ) );
-}
-
-static inline void vmathT3SetElem( VmathTransform3 *result, int col, int row, float val )
-{
-    VmathVector3 tmpV3_0;
-    vmathT3GetCol( &tmpV3_0, result, col );
-    vmathV3SetElem( &tmpV3_0, row, val );
-    vmathT3SetCol( result, col, &tmpV3_0 );
-}
-
-static inline float vmathT3GetElem( const VmathTransform3 *tfrm, int col, int row )
-{
-    VmathVector3 tmpV3_0;
-    vmathT3GetCol( &tmpV3_0, tfrm, col );
-    return vmathV3GetElem( &tmpV3_0, row );
-}
-
-static inline void vmathT3GetCol0( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col0 );
-}
-
-static inline void vmathT3GetCol1( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col1 );
-}
-
-static inline void vmathT3GetCol2( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col2 );
-}
-
-static inline void vmathT3GetCol3( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col3 );
-}
-
-static inline void vmathT3GetCol( VmathVector3 *result, const VmathTransform3 *tfrm, int col )
-{
-    vmathV3Copy( result, (&tfrm->col0 + col) );
-}
-
-static inline void vmathT3GetRow( VmathVector4 *result, const VmathTransform3 *tfrm, int row )
-{
-    vmathV4MakeFromElems( result, vmathV3GetElem( &tfrm->col0, row ), vmathV3GetElem( &tfrm->col1, row ), vmathV3GetElem( &tfrm->col2, row ), vmathV3GetElem( &tfrm->col3, row ) );
-}
-
-static inline void vmathT3Inverse( VmathTransform3 *result, const VmathTransform3 *tfrm )
-{
-    vec_float4 inv0, inv1, inv2, inv3;
-    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    tmp2 = _vmathVfCross( tfrm->col0.vec128, tfrm->col1.vec128 );
-    tmp0 = _vmathVfCross( tfrm->col1.vec128, tfrm->col2.vec128 );
-    tmp1 = _vmathVfCross( tfrm->col2.vec128, tfrm->col0.vec128 );
-    inv3 = negatef4( tfrm->col3.vec128 );
-    dot = _vmathVfDot3( tmp2, tfrm->col2.vec128 );
-    dot = spu_shuffle( dot, dot, shuffle_xxxx );
-    invdet = recipf4( dot );
-    tmp3 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_XAYB );
-    tmp4 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_ZCWD );
-    inv0 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_XAYB );
-    xxxx = spu_shuffle( inv3, inv3, shuffle_xxxx );
-    inv1 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_ZBW0 );
-    inv2 = spu_shuffle( tmp4, tmp1, _VECTORMATH_SHUF_XCY0 );
-    yyyy = spu_shuffle( inv3, inv3, shuffle_yyyy );
-    zzzz = spu_shuffle( inv3, inv3, shuffle_zzzz );
-    inv3 = spu_mul( inv0, xxxx );
-    inv3 = spu_madd( inv1, yyyy, inv3 );
-    inv3 = spu_madd( inv2, zzzz, inv3 );
-    inv0 = spu_mul( inv0, invdet );
-    inv1 = spu_mul( inv1, invdet );
-    inv2 = spu_mul( inv2, invdet );
-    inv3 = spu_mul( inv3, invdet );
-    result->col0.vec128 = inv0;
-    result->col1.vec128 = inv1;
-    result->col2.vec128 = inv2;
-    result->col3.vec128 = inv3;
-}
-
-static inline void vmathT3OrthoInverse( VmathTransform3 *result, const VmathTransform3 *tfrm )
-{
-    vec_float4 inv0, inv1, inv2, inv3;
-    vec_float4 tmp0, tmp1;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    tmp0 = spu_shuffle( tfrm->col0.vec128, tfrm->col2.vec128, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( tfrm->col0.vec128, tfrm->col2.vec128, _VECTORMATH_SHUF_ZCWD );
-    inv3 = negatef4( tfrm->col3.vec128 );
-    inv0 = spu_shuffle( tmp0, tfrm->col1.vec128, _VECTORMATH_SHUF_XAYB );
-    xxxx = spu_shuffle( inv3, inv3, shuffle_xxxx );
-    inv1 = spu_shuffle( tmp0, tfrm->col1.vec128, _VECTORMATH_SHUF_ZBW0 );
-    inv2 = spu_shuffle( tmp1, tfrm->col1.vec128, _VECTORMATH_SHUF_XCY0 );
-    yyyy = spu_shuffle( inv3, inv3, shuffle_yyyy );
-    zzzz = spu_shuffle( inv3, inv3, shuffle_zzzz );
-    inv3 = spu_mul( inv0, xxxx );
-    inv3 = spu_madd( inv1, yyyy, inv3 );
-    inv3 = spu_madd( inv2, zzzz, inv3 );
-    result->col0.vec128 = inv0;
-    result->col1.vec128 = inv1;
-    result->col2.vec128 = inv2;
-    result->col3.vec128 = inv3;
-}
-
-static inline void vmathT3AbsPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3AbsPerElem( &result->col0, &tfrm->col0 );
-    vmathV3AbsPerElem( &result->col1, &tfrm->col1 );
-    vmathV3AbsPerElem( &result->col2, &tfrm->col2 );
-    vmathV3AbsPerElem( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathT3MulV3( VmathVector3 *result, const VmathTransform3 *tfrm, const VmathVector3 *vec )
-{
-    vec_float4 res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    xxxx = spu_shuffle( vec->vec128, vec->vec128, shuffle_xxxx );
-    yyyy = spu_shuffle( vec->vec128, vec->vec128, shuffle_yyyy );
-    zzzz = spu_shuffle( vec->vec128, vec->vec128, shuffle_zzzz );
-    res = spu_mul( tfrm->col0.vec128, xxxx );
-    res = spu_madd( tfrm->col1.vec128, yyyy, res );
-    res = spu_madd( tfrm->col2.vec128, zzzz, res );
-    result->vec128 = res;
-}
-
-static inline void vmathT3MulP3( VmathPoint3 *result, const VmathTransform3 *tfrm, const VmathPoint3 *pnt )
-{
-    vec_float4 tmp0, tmp1, res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    xxxx = spu_shuffle( pnt->vec128, pnt->vec128, shuffle_xxxx );
-    yyyy = spu_shuffle( pnt->vec128, pnt->vec128, shuffle_yyyy );
-    zzzz = spu_shuffle( pnt->vec128, pnt->vec128, shuffle_zzzz );
-    tmp0 = spu_mul( tfrm->col0.vec128, xxxx );
-    tmp1 = spu_mul( tfrm->col1.vec128, yyyy );
-    tmp0 = spu_madd( tfrm->col2.vec128, zzzz, tmp0 );
-    tmp1 = spu_add( tfrm->col3.vec128, tmp1 );
-    res = spu_add( tmp0, tmp1 );
-    result->vec128 = res;
-}
-
-static inline void vmathT3Mul( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 )
-{
-    VmathTransform3 tmpResult;
-    VmathPoint3 tmpP3_0, tmpP3_1;
-    vmathT3MulV3( &tmpResult.col0, tfrm0, &tfrm1->col0 );
-    vmathT3MulV3( &tmpResult.col1, tfrm0, &tfrm1->col1 );
-    vmathT3MulV3( &tmpResult.col2, tfrm0, &tfrm1->col2 );
-    vmathP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
-    vmathT3MulP3( &tmpP3_1, tfrm0, &tmpP3_0 );
-    vmathV3MakeFromP3( &tmpResult.col3, &tmpP3_1 );
-    vmathT3Copy( result, &tmpResult );
-}
-
-static inline void vmathT3MulPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 )
-{
-    vmathV3MulPerElem( &result->col0, &tfrm0->col0, &tfrm1->col0 );
-    vmathV3MulPerElem( &result->col1, &tfrm0->col1, &tfrm1->col1 );
-    vmathV3MulPerElem( &result->col2, &tfrm0->col2, &tfrm1->col2 );
-    vmathV3MulPerElem( &result->col3, &tfrm0->col3, &tfrm1->col3 );
-}
-
-static inline void vmathT3MakeIdentity( VmathTransform3 *result )
-{
-    vmathV3MakeXAxis( &result->col0 );
-    vmathV3MakeYAxis( &result->col1 );
-    vmathV3MakeZAxis( &result->col2 );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3SetUpper3x3( VmathTransform3 *result, const VmathMatrix3 *tfrm )
-{
-    vmathV3Copy( &result->col0, &tfrm->col0 );
-    vmathV3Copy( &result->col1, &tfrm->col1 );
-    vmathV3Copy( &result->col2, &tfrm->col2 );
-}
-
-static inline void vmathT3GetUpper3x3( VmathMatrix3 *result, const VmathTransform3 *tfrm )
-{
-    vmathM3MakeFromCols( result, &tfrm->col0, &tfrm->col1, &tfrm->col2 );
-}
-
-static inline void vmathT3SetTranslation( VmathTransform3 *result, const VmathVector3 *translateVec )
-{
-    vmathV3Copy( &result->col3, translateVec );
-}
-
-static inline void vmathT3GetTranslation( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col3 );
-}
-
-static inline void vmathT3MakeRotationX( VmathTransform3 *result, float radians )
-{
-    vec_float4 s, c, res1, res2;
-    vec_uint4 select_y, select_z;
-    vec_float4 zero;
-    select_y = (vec_uint4)spu_maskb(0x0f00);
-    select_z = (vec_uint4)spu_maskb(0x00f0);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res1 = spu_sel( zero, c, select_y );
-    res1 = spu_sel( res1, s, select_z );
-    res2 = spu_sel( zero, negatef4(s), select_y );
-    res2 = spu_sel( res2, c, select_z );
-    vmathV3MakeXAxis( &result->col0 );
-    result->col1.vec128 = res1;
-    result->col2.vec128 = res2;
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3MakeRotationY( VmathTransform3 *result, float radians )
-{
-    vec_float4 s, c, res0, res2;
-    vec_uint4 select_x, select_z;
-    vec_float4 zero;
-    select_x = (vec_uint4)spu_maskb(0xf000);
-    select_z = (vec_uint4)spu_maskb(0x00f0);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res0 = spu_sel( zero, c, select_x );
-    res0 = spu_sel( res0, negatef4(s), select_z );
-    res2 = spu_sel( zero, s, select_x );
-    res2 = spu_sel( res2, c, select_z );
-    result->col0.vec128 = res0;
-    vmathV3MakeYAxis( &result->col1 );
-    result->col2.vec128 = res2;
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3MakeRotationZ( VmathTransform3 *result, float radians )
-{
-    vec_float4 s, c, res0, res1;
-    vec_uint4 select_x, select_y;
-    vec_float4 zero;
-    select_x = (vec_uint4)spu_maskb(0xf000);
-    select_y = (vec_uint4)spu_maskb(0x0f00);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res0 = spu_sel( zero, c, select_x );
-    res0 = spu_sel( res0, s, select_y );
-    res1 = spu_sel( zero, negatef4(s), select_x );
-    res1 = spu_sel( res1, c, select_y );
-    result->col0.vec128 = res0;
-    result->col1.vec128 = res1;
-    vmathV3MakeZAxis( &result->col2 );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3MakeRotationZYX( VmathTransform3 *result, const VmathVector3 *radiansXYZ )
-{
-    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    angles = radiansXYZ->vec128;
-    angles = spu_insert( 0.0f, angles, 3 );
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
-    Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
-    Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
-    Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
-    X0 = spu_shuffle( s, s, shuffle_xxxx );
-    X1 = spu_shuffle( c, c, shuffle_xxxx );
-    tmp = spu_mul( Z0, Y1 );
-    result->col0.vec128 = spu_mul( Z0, Y0 );
-    result->col1.vec128 = spu_madd( Z1, X1, spu_mul( tmp, X0 ) );
-    result->col2.vec128 = spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3MakeRotationAxis( VmathTransform3 *result, float radians, const VmathVector3 *unitVec )
-{
-    VmathMatrix3 tmpM3_0;
-    VmathVector3 tmpV3_0;
-    vmathM3MakeRotationAxis( &tmpM3_0, radians, unitVec );
-    vmathV3MakeFromScalar( &tmpV3_0, 0.0f );
-    vmathT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
-}
-
-static inline void vmathT3MakeRotationQ( VmathTransform3 *result, const VmathQuat *unitQuat )
-{
-    VmathMatrix3 tmpM3_0;
-    VmathVector3 tmpV3_0;
-    vmathM3MakeFromQ( &tmpM3_0, unitQuat );
-    vmathV3MakeFromScalar( &tmpV3_0, 0.0f );
-    vmathT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
-}
-
-static inline void vmathT3MakeScale( VmathTransform3 *result, const VmathVector3 *scaleVec )
-{
-    vec_float4 zero = spu_splats(0.0f);
-    result->col0.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0xf000) );
-    result->col1.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0x0f00) );
-    result->col2.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0x00f0) );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3AppendScale( VmathTransform3 *result, const VmathTransform3 *tfrm, const VmathVector3 *scaleVec )
-{
-    vmathV3ScalarMul( &result->col0, &tfrm->col0, vmathV3GetX( scaleVec ) );
-    vmathV3ScalarMul( &result->col1, &tfrm->col1, vmathV3GetY( scaleVec ) );
-    vmathV3ScalarMul( &result->col2, &tfrm->col2, vmathV3GetZ( scaleVec ) );
-    vmathV3Copy( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathT3PrependScale( VmathTransform3 *result, const VmathVector3 *scaleVec, const VmathTransform3 *tfrm )
-{
-    vmathV3MulPerElem( &result->col0, &tfrm->col0, scaleVec );
-    vmathV3MulPerElem( &result->col1, &tfrm->col1, scaleVec );
-    vmathV3MulPerElem( &result->col2, &tfrm->col2, scaleVec );
-    vmathV3MulPerElem( &result->col3, &tfrm->col3, scaleVec );
-}
-
-static inline void vmathT3MakeTranslation( VmathTransform3 *result, const VmathVector3 *translateVec )
-{
-    vmathV3MakeXAxis( &result->col0 );
-    vmathV3MakeYAxis( &result->col1 );
-    vmathV3MakeZAxis( &result->col2 );
-    vmathV3Copy( &result->col3, translateVec );
-}
-
-static inline void vmathT3Select( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, unsigned int select1 )
-{
-    vmathV3Select( &result->col0, &tfrm0->col0, &tfrm1->col0, select1 );
-    vmathV3Select( &result->col1, &tfrm0->col1, &tfrm1->col1, select1 );
-    vmathV3Select( &result->col2, &tfrm0->col2, &tfrm1->col2, select1 );
-    vmathV3Select( &result->col3, &tfrm0->col3, &tfrm1->col3, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathT3Print( const VmathTransform3 *tfrm )
-{
-    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2;
-    vmathT3GetRow( &tmpV4_0, tfrm, 0 );
-    vmathV4Print( &tmpV4_0 );
-    vmathT3GetRow( &tmpV4_1, tfrm, 1 );
-    vmathV4Print( &tmpV4_1 );
-    vmathT3GetRow( &tmpV4_2, tfrm, 2 );
-    vmathV4Print( &tmpV4_2 );
-}
-
-static inline void vmathT3Prints( const VmathTransform3 *tfrm, const char *name )
-{
-    printf("%s:\n", name);
-    vmathT3Print( tfrm );
-}
-
-#endif
-
-static inline void vmathQMakeFromM3( VmathQuat *result, const VmathMatrix3 *tfrm )
-{
-    vec_float4 res;
-    vec_float4 col0, col1, col2;
-    vec_float4 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff;
-    vec_float4 zy_xz_yx, yz_zx_xy, sum, diff;
-    vec_float4 radicand, invSqrt, scale;
-    vec_float4 res0, res1, res2, res3;
-    vec_float4 xx, yy, zz;
-    vec_uint4 select_x = (vec_uint4)spu_maskb( 0xf000 );
-    vec_uint4 select_y = (vec_uint4)spu_maskb( 0x0f00 );
-    vec_uint4 select_z = (vec_uint4)spu_maskb( 0x00f0 );
-    vec_uint4 select_w = (vec_uint4)spu_maskb( 0x000f );
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((unsigned int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((unsigned int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((unsigned int)0x08090a0b);
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((unsigned int)0x0c0d0e0f);
-
-    col0 = tfrm->col0.vec128;
-    col1 = tfrm->col1.vec128;
-    col2 = tfrm->col2.vec128;
-
-    /* four cases: */
-    /* trace > 0 */
-    /* else */
-    /*    xx largest diagonal element */
-    /*    yy largest diagonal element */
-    /*    zz largest diagonal element */
-
-    /* compute quaternion for each case */
-
-    xx_yy = spu_sel( col0, col1, select_y );
-    xx_yy_zz_xx = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_XYCX );
-    yy_zz_xx_yy = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_YCXY );
-    zz_xx_yy_zz = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_CXYC );
-
-    diagSum = spu_add( spu_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
-    diagDiff = spu_sub( spu_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
-    radicand = spu_add( spu_sel( diagDiff, diagSum, select_w ), spu_splats(1.0f) );
-    invSqrt = rsqrtf4( radicand );
-
-    zy_xz_yx = spu_sel( col0, col1, select_z );
-    zy_xz_yx = spu_shuffle( zy_xz_yx, col2, _VECTORMATH_SHUF_ZAY0 );
-    yz_zx_xy = spu_sel( col0, col1, select_x );
-    yz_zx_xy = spu_shuffle( yz_zx_xy, col2, _VECTORMATH_SHUF_BZX0 );
-
-    sum = spu_add( zy_xz_yx, yz_zx_xy );
-    diff = spu_sub( zy_xz_yx, yz_zx_xy );
-
-    scale = spu_mul( invSqrt, spu_splats(0.5f) );
-    res0 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_0ZYA );
-    res1 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_Z0XB );
-    res2 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_YX0C );
-    res3 = diff;
-    res0 = spu_sel( res0, radicand, select_x );
-    res1 = spu_sel( res1, radicand, select_y );
-    res2 = spu_sel( res2, radicand, select_z );
-    res3 = spu_sel( res3, radicand, select_w );
-    res0 = spu_mul( res0, spu_shuffle( scale, scale, shuffle_xxxx ) );
-    res1 = spu_mul( res1, spu_shuffle( scale, scale, shuffle_yyyy ) );
-    res2 = spu_mul( res2, spu_shuffle( scale, scale, shuffle_zzzz ) );
-    res3 = spu_mul( res3, spu_shuffle( scale, scale, shuffle_wwww ) );
-
-    /* determine case and select answer */
-
-    xx = spu_shuffle( col0, col0, shuffle_xxxx );
-    yy = spu_shuffle( col1, col1, shuffle_yyyy );
-    zz = spu_shuffle( col2, col2, shuffle_zzzz );
-    res = spu_sel( res0, res1, spu_cmpgt( yy, xx ) );
-    res = spu_sel( res, res2, spu_and( spu_cmpgt( zz, xx ), spu_cmpgt( zz, yy ) ) );
-    res = spu_sel( res, res3, spu_cmpgt( spu_shuffle( diagSum, diagSum, shuffle_xxxx ), spu_splats(0.0f) ) );
-    result->vec128 = res;
-}
-
-static inline void vmathV3Outer( VmathMatrix3 *result, const VmathVector3 *tfrm0, const VmathVector3 *tfrm1 )
-{
-    vmathV3ScalarMul( &result->col0, tfrm0, vmathV3GetX( tfrm1 ) );
-    vmathV3ScalarMul( &result->col1, tfrm0, vmathV3GetY( tfrm1 ) );
-    vmathV3ScalarMul( &result->col2, tfrm0, vmathV3GetZ( tfrm1 ) );
-}
-
-static inline void vmathV4Outer( VmathMatrix4 *result, const VmathVector4 *tfrm0, const VmathVector4 *tfrm1 )
-{
-    vmathV4ScalarMul( &result->col0, tfrm0, vmathV4GetX( tfrm1 ) );
-    vmathV4ScalarMul( &result->col1, tfrm0, vmathV4GetY( tfrm1 ) );
-    vmathV4ScalarMul( &result->col2, tfrm0, vmathV4GetZ( tfrm1 ) );
-    vmathV4ScalarMul( &result->col3, tfrm0, vmathV4GetW( tfrm1 ) );
-}
-
-static inline void vmathV3RowMul( VmathVector3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat )
-{
-    vec_float4 tmp0, tmp1, mcol0, mcol1, mcol2, res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    tmp0 = spu_shuffle( mat->col0.vec128, mat->col2.vec128, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( mat->col0.vec128, mat->col2.vec128, _VECTORMATH_SHUF_ZCWD );
-    xxxx = spu_shuffle( vec->vec128, vec->vec128, shuffle_xxxx );
-    mcol0 = spu_shuffle( tmp0, mat->col1.vec128, _VECTORMATH_SHUF_XAYB );
-    mcol1 = spu_shuffle( tmp0, mat->col1.vec128, _VECTORMATH_SHUF_ZBW0 );
-    mcol2 = spu_shuffle( tmp1, mat->col1.vec128, _VECTORMATH_SHUF_XCY0 );
-    yyyy = spu_shuffle( vec->vec128, vec->vec128, shuffle_yyyy );
-    res = spu_mul( mcol0, xxxx );
-    zzzz = spu_shuffle( vec->vec128, vec->vec128, shuffle_zzzz );
-    res = spu_madd( mcol1, yyyy, res );
-    res = spu_madd( mcol2, zzzz, res );
-    result->vec128 = res;
-}
-
-static inline void vmathV3CrossMatrix( VmathMatrix3 *result, const VmathVector3 *vec )
-{
-    vec_float4 neg, res0, res1, res2;
-    neg = negatef4( vec->vec128 );
-    res0 = spu_shuffle( vec->vec128, neg, _VECTORMATH_SHUF_0ZB0 );
-    res1 = spu_shuffle( vec->vec128, neg, _VECTORMATH_SHUF_C0X0 );
-    res2 = spu_shuffle( vec->vec128, neg, _VECTORMATH_SHUF_YA00 );
-    result->col0.vec128 = res0;
-    result->col1.vec128 = res1;
-    result->col2.vec128 = res2;
-}
-
-static inline void vmathV3CrossMatrixMul( VmathMatrix3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat )
-{
-    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2;
-    vmathV3Cross( &tmpV3_0, vec, &mat->col0 );
-    vmathV3Cross( &tmpV3_1, vec, &mat->col1 );
-    vmathV3Cross( &tmpV3_2, vec, &mat->col2 );
-    vmathM3MakeFromCols( result, &tmpV3_0, &tmpV3_1, &tmpV3_2 );
-}
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_AOS_C_H
+#define _VECTORMATH_MAT_AOS_C_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+ */
+#define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_0ZB0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_0 })     
+#define _VECTORMATH_SHUF_C0X0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_YA00 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_Z })
+#define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X })
+#define _VECTORMATH_SHUF_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y })
+#define _VECTORMATH_SHUF_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_ZAY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_BZX0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_0ZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A })
+#define _VECTORMATH_SHUF_Z0XB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_YX0C ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_CZD0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_BBY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+static inline void vmathM3Copy( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Copy( &result->col0, &mat->col0 );
+    vmathV3Copy( &result->col1, &mat->col1 );
+    vmathV3Copy( &result->col2, &mat->col2 );
+}
+
+static inline void vmathM3MakeFromScalar( VmathMatrix3 *result, float scalar )
+{
+    vmathV3MakeFromScalar( &result->col0, scalar );
+    vmathV3MakeFromScalar( &result->col1, scalar );
+    vmathV3MakeFromScalar( &result->col2, scalar );
+}
+
+static inline void vmathM3MakeFromQ( VmathMatrix3 *result, const VmathQuat *unitQuat )
+{
+    vec_float4 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2;
+    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
+    vec_uint4 select_x = (vec_uint4)spu_maskb(0xf000);
+    vec_uint4 select_z = (vec_uint4)spu_maskb(0x00f0);
+    xyzw_2 = spu_add( unitQuat->vec128, unitQuat->vec128 );
+    wwww = spu_shuffle( unitQuat->vec128, unitQuat->vec128, shuffle_wwww );
+    yzxw = spu_shuffle( unitQuat->vec128, unitQuat->vec128, _VECTORMATH_SHUF_YZXW );
+    zxyw = spu_shuffle( unitQuat->vec128, unitQuat->vec128, _VECTORMATH_SHUF_ZXYW );
+    yzxw_2 = spu_shuffle( xyzw_2, xyzw_2, _VECTORMATH_SHUF_YZXW );
+    zxyw_2 = spu_shuffle( xyzw_2, xyzw_2, _VECTORMATH_SHUF_ZXYW );
+    tmp0 = spu_mul( yzxw_2, wwww );
+    tmp1 = spu_nmsub( yzxw, yzxw_2, spu_splats(1.0f) );
+    tmp2 = spu_mul( yzxw, xyzw_2 );
+    tmp0 = spu_madd( zxyw, xyzw_2, tmp0 );
+    tmp1 = spu_nmsub( zxyw, zxyw_2, tmp1 );
+    tmp2 = spu_nmsub( zxyw_2, wwww, tmp2 );
+    tmp3 = spu_sel( tmp0, tmp1, select_x );
+    tmp4 = spu_sel( tmp1, tmp2, select_x );
+    tmp5 = spu_sel( tmp2, tmp0, select_x );
+    result->col0.vec128 = spu_sel( tmp3, tmp2, select_z );
+    result->col1.vec128 = spu_sel( tmp4, tmp0, select_z );
+    result->col2.vec128 = spu_sel( tmp5, tmp1, select_z );
+}
+
+static inline void vmathM3MakeFromCols( VmathMatrix3 *result, const VmathVector3 *_col0, const VmathVector3 *_col1, const VmathVector3 *_col2 )
+{
+    vmathV3Copy( &result->col0, _col0 );
+    vmathV3Copy( &result->col1, _col1 );
+    vmathV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathM3SetCol0( VmathMatrix3 *result, const VmathVector3 *_col0 )
+{
+    vmathV3Copy( &result->col0, _col0 );
+}
+
+static inline void vmathM3SetCol1( VmathMatrix3 *result, const VmathVector3 *_col1 )
+{
+    vmathV3Copy( &result->col1, _col1 );
+}
+
+static inline void vmathM3SetCol2( VmathMatrix3 *result, const VmathVector3 *_col2 )
+{
+    vmathV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathM3SetCol( VmathMatrix3 *result, int col, const VmathVector3 *vec )
+{
+    vmathV3Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathM3SetRow( VmathMatrix3 *result, int row, const VmathVector3 *vec )
+{
+    vmathV3SetElem( &result->col0, row, vmathV3GetElem( vec, 0 ) );
+    vmathV3SetElem( &result->col1, row, vmathV3GetElem( vec, 1 ) );
+    vmathV3SetElem( &result->col2, row, vmathV3GetElem( vec, 2 ) );
+}
+
+static inline void vmathM3SetElem( VmathMatrix3 *result, int col, int row, float val )
+{
+    VmathVector3 tmpV3_0;
+    vmathM3GetCol( &tmpV3_0, result, col );
+    vmathV3SetElem( &tmpV3_0, row, val );
+    vmathM3SetCol( result, col, &tmpV3_0 );
+}
+
+static inline float vmathM3GetElem( const VmathMatrix3 *mat, int col, int row )
+{
+    VmathVector3 tmpV3_0;
+    vmathM3GetCol( &tmpV3_0, mat, col );
+    return vmathV3GetElem( &tmpV3_0, row );
+}
+
+static inline void vmathM3GetCol0( VmathVector3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Copy( result, &mat->col0 );
+}
+
+static inline void vmathM3GetCol1( VmathVector3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Copy( result, &mat->col1 );
+}
+
+static inline void vmathM3GetCol2( VmathVector3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Copy( result, &mat->col2 );
+}
+
+static inline void vmathM3GetCol( VmathVector3 *result, const VmathMatrix3 *mat, int col )
+{
+    vmathV3Copy( result, (&mat->col0 + col) );
+}
+
+static inline void vmathM3GetRow( VmathVector3 *result, const VmathMatrix3 *mat, int row )
+{
+    vmathV3MakeFromElems( result, vmathV3GetElem( &mat->col0, row ), vmathV3GetElem( &mat->col1, row ), vmathV3GetElem( &mat->col2, row ) );
+}
+
+static inline void vmathM3Transpose( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vec_float4 tmp0, tmp1, res0, res1, res2;
+    tmp0 = spu_shuffle( mat->col0.vec128, mat->col2.vec128, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( mat->col0.vec128, mat->col2.vec128, _VECTORMATH_SHUF_ZCWD );
+    res0 = spu_shuffle( tmp0, mat->col1.vec128, _VECTORMATH_SHUF_XAYB );
+    res1 = spu_shuffle( tmp0, mat->col1.vec128, _VECTORMATH_SHUF_ZBW0 );
+    res2 = spu_shuffle( tmp1, mat->col1.vec128, _VECTORMATH_SHUF_XCY0 );
+    result->col0.vec128 = res0;
+    result->col1.vec128 = res1;
+    result->col2.vec128 = res2;
+}
+
+static inline void vmathM3Inverse( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2;
+    tmp2 = _vmathVfCross( mat->col0.vec128, mat->col1.vec128 );
+    tmp0 = _vmathVfCross( mat->col1.vec128, mat->col2.vec128 );
+    tmp1 = _vmathVfCross( mat->col2.vec128, mat->col0.vec128 );
+    dot = _vmathVfDot3( tmp2, mat->col2.vec128 );
+    dot = spu_shuffle( dot, dot, (vec_uchar16)spu_splats(0x00010203) );
+    invdet = recipf4( dot );
+    tmp3 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_XAYB );
+    tmp4 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_ZCWD );
+    inv0 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_XAYB );
+    inv1 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_ZBW0 );
+    inv2 = spu_shuffle( tmp4, tmp1, _VECTORMATH_SHUF_XCY0 );
+    inv0 = spu_mul( inv0, invdet );
+    inv1 = spu_mul( inv1, invdet );
+    inv2 = spu_mul( inv2, invdet );
+    result->col0.vec128 = inv0;
+    result->col1.vec128 = inv1;
+    result->col2.vec128 = inv2;
+}
+
+static inline float vmathM3Determinant( const VmathMatrix3 *mat )
+{
+    VmathVector3 tmpV3_0;
+    vmathV3Cross( &tmpV3_0, &mat->col0, &mat->col1 );
+    return vmathV3Dot( &mat->col2, &tmpV3_0 );
+}
+
+static inline void vmathM3Add( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
+{
+    vmathV3Add( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV3Add( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV3Add( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathM3Sub( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
+{
+    vmathV3Sub( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV3Sub( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV3Sub( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathM3Neg( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Neg( &result->col0, &mat->col0 );
+    vmathV3Neg( &result->col1, &mat->col1 );
+    vmathV3Neg( &result->col2, &mat->col2 );
+}
+
+static inline void vmathM3AbsPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3AbsPerElem( &result->col0, &mat->col0 );
+    vmathV3AbsPerElem( &result->col1, &mat->col1 );
+    vmathV3AbsPerElem( &result->col2, &mat->col2 );
+}
+
+static inline void vmathM3ScalarMul( VmathMatrix3 *result, const VmathMatrix3 *mat, float scalar )
+{
+    vmathV3ScalarMul( &result->col0, &mat->col0, scalar );
+    vmathV3ScalarMul( &result->col1, &mat->col1, scalar );
+    vmathV3ScalarMul( &result->col2, &mat->col2, scalar );
+}
+
+static inline void vmathM3MulV3( VmathVector3 *result, const VmathMatrix3 *mat, const VmathVector3 *vec )
+{
+    vec_float4 res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    xxxx = spu_shuffle( vec->vec128, vec->vec128, shuffle_xxxx );
+    yyyy = spu_shuffle( vec->vec128, vec->vec128, shuffle_yyyy );
+    zzzz = spu_shuffle( vec->vec128, vec->vec128, shuffle_zzzz );
+    res = spu_mul( mat->col0.vec128, xxxx );
+    res = spu_madd( mat->col1.vec128, yyyy, res );
+    res = spu_madd( mat->col2.vec128, zzzz, res );
+    result->vec128 = res;
+}
+
+static inline void vmathM3Mul( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
+{
+    VmathMatrix3 tmpResult;
+    vmathM3MulV3( &tmpResult.col0, mat0, &mat1->col0 );
+    vmathM3MulV3( &tmpResult.col1, mat0, &mat1->col1 );
+    vmathM3MulV3( &tmpResult.col2, mat0, &mat1->col2 );
+    vmathM3Copy( result, &tmpResult );
+}
+
+static inline void vmathM3MulPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
+{
+    vmathV3MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV3MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV3MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathM3MakeIdentity( VmathMatrix3 *result )
+{
+    vmathV3MakeXAxis( &result->col0 );
+    vmathV3MakeYAxis( &result->col1 );
+    vmathV3MakeZAxis( &result->col2 );
+}
+
+static inline void vmathM3MakeRotationX( VmathMatrix3 *result, float radians )
+{
+    vec_float4 s, c, res1, res2;
+    vec_uint4 select_y, select_z;
+    vec_float4 zero;
+    select_y = (vec_uint4)spu_maskb(0x0f00);
+    select_z = (vec_uint4)spu_maskb(0x00f0);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res1 = spu_sel( zero, c, select_y );
+    res1 = spu_sel( res1, s, select_z );
+    res2 = spu_sel( zero, negatef4(s), select_y );
+    res2 = spu_sel( res2, c, select_z );
+    vmathV3MakeXAxis( &result->col0 );
+    result->col1.vec128 = res1;
+    result->col2.vec128 = res2;
+}
+
+static inline void vmathM3MakeRotationY( VmathMatrix3 *result, float radians )
+{
+    vec_float4 s, c, res0, res2;
+    vec_uint4 select_x, select_z;
+    vec_float4 zero;
+    select_x = (vec_uint4)spu_maskb(0xf000);
+    select_z = (vec_uint4)spu_maskb(0x00f0);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res0 = spu_sel( zero, c, select_x );
+    res0 = spu_sel( res0, negatef4(s), select_z );
+    res2 = spu_sel( zero, s, select_x );
+    res2 = spu_sel( res2, c, select_z );
+    result->col0.vec128 = res0;
+    vmathV3MakeYAxis( &result->col1 );
+    result->col2.vec128 = res2;
+}
+
+static inline void vmathM3MakeRotationZ( VmathMatrix3 *result, float radians )
+{
+    vec_float4 s, c, res0, res1;
+    vec_uint4 select_x, select_y;
+    vec_float4 zero;
+    select_x = (vec_uint4)spu_maskb(0xf000);
+    select_y = (vec_uint4)spu_maskb(0x0f00);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res0 = spu_sel( zero, c, select_x );
+    res0 = spu_sel( res0, s, select_y );
+    res1 = spu_sel( zero, negatef4(s), select_x );
+    res1 = spu_sel( res1, c, select_y );
+    result->col0.vec128 = res0;
+    result->col1.vec128 = res1;
+    vmathV3MakeZAxis( &result->col2 );
+}
+
+static inline void vmathM3MakeRotationZYX( VmathMatrix3 *result, const VmathVector3 *radiansXYZ )
+{
+    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    angles = radiansXYZ->vec128;
+    angles = spu_insert( 0.0f, angles, 3 );
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
+    Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
+    Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
+    Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
+    X0 = spu_shuffle( s, s, shuffle_xxxx );
+    X1 = spu_shuffle( c, c, shuffle_xxxx );
+    tmp = spu_mul( Z0, Y1 );
+    result->col0.vec128 = spu_mul( Z0, Y0 );
+    result->col1.vec128 = spu_madd( Z1, X1, spu_mul( tmp, X0 ) );
+    result->col2.vec128 = spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) );
+}
+
+static inline void vmathM3MakeRotationAxis( VmathMatrix3 *result, float radians, const VmathVector3 *unitVec )
+{
+    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    axis = unitVec->vec128;
+    sincosf4( spu_splats( radians ), &s, &c );
+    xxxx = spu_shuffle( axis, axis, shuffle_xxxx );
+    yyyy = spu_shuffle( axis, axis, shuffle_yyyy );
+    zzzz = spu_shuffle( axis, axis, shuffle_zzzz );
+    oneMinusC = spu_sub( spu_splats(1.0f), c );
+    axisS = spu_mul( axis, s );
+    negAxisS = negatef4( axisS );
+    tmp0 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_0ZB0 );
+    tmp1 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_C0X0 );
+    tmp2 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_YA00 );
+    tmp0 = spu_sel( tmp0, c, (vec_uint4)spu_maskb(0xf000) );
+    tmp1 = spu_sel( tmp1, c, (vec_uint4)spu_maskb(0x0f00) );
+    tmp2 = spu_sel( tmp2, c, (vec_uint4)spu_maskb(0x00f0) );
+    result->col0.vec128 = spu_madd( spu_mul( axis, xxxx ), oneMinusC, tmp0 );
+    result->col1.vec128 = spu_madd( spu_mul( axis, yyyy ), oneMinusC, tmp1 );
+    result->col2.vec128 = spu_madd( spu_mul( axis, zzzz ), oneMinusC, tmp2 );
+}
+
+static inline void vmathM3MakeRotationQ( VmathMatrix3 *result, const VmathQuat *unitQuat )
+{
+    vmathM3MakeFromQ( result, unitQuat );
+}
+
+static inline void vmathM3MakeScale( VmathMatrix3 *result, const VmathVector3 *scaleVec )
+{
+    vec_float4 zero = spu_splats(0.0f);
+    result->col0.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0xf000) );
+    result->col1.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0x0f00) );
+    result->col2.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0x00f0) );
+}
+
+static inline void vmathM3AppendScale( VmathMatrix3 *result, const VmathMatrix3 *mat, const VmathVector3 *scaleVec )
+{
+    vmathV3ScalarMul( &result->col0, &mat->col0, vmathV3GetX( scaleVec ) );
+    vmathV3ScalarMul( &result->col1, &mat->col1, vmathV3GetY( scaleVec ) );
+    vmathV3ScalarMul( &result->col2, &mat->col2, vmathV3GetZ( scaleVec ) );
+}
+
+static inline void vmathM3PrependScale( VmathMatrix3 *result, const VmathVector3 *scaleVec, const VmathMatrix3 *mat )
+{
+    vmathV3MulPerElem( &result->col0, &mat->col0, scaleVec );
+    vmathV3MulPerElem( &result->col1, &mat->col1, scaleVec );
+    vmathV3MulPerElem( &result->col2, &mat->col2, scaleVec );
+}
+
+static inline void vmathM3Select( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, unsigned int select1 )
+{
+    vmathV3Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
+    vmathV3Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
+    vmathV3Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathM3Print( const VmathMatrix3 *mat )
+{
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2;
+    vmathM3GetRow( &tmpV3_0, mat, 0 );
+    vmathV3Print( &tmpV3_0 );
+    vmathM3GetRow( &tmpV3_1, mat, 1 );
+    vmathV3Print( &tmpV3_1 );
+    vmathM3GetRow( &tmpV3_2, mat, 2 );
+    vmathV3Print( &tmpV3_2 );
+}
+
+static inline void vmathM3Prints( const VmathMatrix3 *mat, const char *name )
+{
+    printf("%s:\n", name);
+    vmathM3Print( mat );
+}
+
+#endif
+
+static inline void vmathM4Copy( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( &result->col0, &mat->col0 );
+    vmathV4Copy( &result->col1, &mat->col1 );
+    vmathV4Copy( &result->col2, &mat->col2 );
+    vmathV4Copy( &result->col3, &mat->col3 );
+}
+
+static inline void vmathM4MakeFromScalar( VmathMatrix4 *result, float scalar )
+{
+    vmathV4MakeFromScalar( &result->col0, scalar );
+    vmathV4MakeFromScalar( &result->col1, scalar );
+    vmathV4MakeFromScalar( &result->col2, scalar );
+    vmathV4MakeFromScalar( &result->col3, scalar );
+}
+
+static inline void vmathM4MakeFromT3( VmathMatrix4 *result, const VmathTransform3 *mat )
+{
+    vmathV4MakeFromV3Scalar( &result->col0, &mat->col0, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col1, &mat->col1, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col2, &mat->col2, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col3, &mat->col3, 1.0f );
+}
+
+static inline void vmathM4MakeFromCols( VmathMatrix4 *result, const VmathVector4 *_col0, const VmathVector4 *_col1, const VmathVector4 *_col2, const VmathVector4 *_col3 )
+{
+    vmathV4Copy( &result->col0, _col0 );
+    vmathV4Copy( &result->col1, _col1 );
+    vmathV4Copy( &result->col2, _col2 );
+    vmathV4Copy( &result->col3, _col3 );
+}
+
+static inline void vmathM4MakeFromM3V3( VmathMatrix4 *result, const VmathMatrix3 *mat, const VmathVector3 *translateVec )
+{
+    vmathV4MakeFromV3Scalar( &result->col0, &mat->col0, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col1, &mat->col1, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col2, &mat->col2, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
+}
+
+static inline void vmathM4MakeFromQV3( VmathMatrix4 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec )
+{
+    VmathMatrix3 mat;
+    vmathM3MakeFromQ( &mat, unitQuat );
+    vmathV4MakeFromV3Scalar( &result->col0, &mat.col0, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col1, &mat.col1, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col2, &mat.col2, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
+}
+
+static inline void vmathM4SetCol0( VmathMatrix4 *result, const VmathVector4 *_col0 )
+{
+    vmathV4Copy( &result->col0, _col0 );
+}
+
+static inline void vmathM4SetCol1( VmathMatrix4 *result, const VmathVector4 *_col1 )
+{
+    vmathV4Copy( &result->col1, _col1 );
+}
+
+static inline void vmathM4SetCol2( VmathMatrix4 *result, const VmathVector4 *_col2 )
+{
+    vmathV4Copy( &result->col2, _col2 );
+}
+
+static inline void vmathM4SetCol3( VmathMatrix4 *result, const VmathVector4 *_col3 )
+{
+    vmathV4Copy( &result->col3, _col3 );
+}
+
+static inline void vmathM4SetCol( VmathMatrix4 *result, int col, const VmathVector4 *vec )
+{
+    vmathV4Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathM4SetRow( VmathMatrix4 *result, int row, const VmathVector4 *vec )
+{
+    vmathV4SetElem( &result->col0, row, vmathV4GetElem( vec, 0 ) );
+    vmathV4SetElem( &result->col1, row, vmathV4GetElem( vec, 1 ) );
+    vmathV4SetElem( &result->col2, row, vmathV4GetElem( vec, 2 ) );
+    vmathV4SetElem( &result->col3, row, vmathV4GetElem( vec, 3 ) );
+}
+
+static inline void vmathM4SetElem( VmathMatrix4 *result, int col, int row, float val )
+{
+    VmathVector4 tmpV3_0;
+    vmathM4GetCol( &tmpV3_0, result, col );
+    vmathV4SetElem( &tmpV3_0, row, val );
+    vmathM4SetCol( result, col, &tmpV3_0 );
+}
+
+static inline float vmathM4GetElem( const VmathMatrix4 *mat, int col, int row )
+{
+    VmathVector4 tmpV4_0;
+    vmathM4GetCol( &tmpV4_0, mat, col );
+    return vmathV4GetElem( &tmpV4_0, row );
+}
+
+static inline void vmathM4GetCol0( VmathVector4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( result, &mat->col0 );
+}
+
+static inline void vmathM4GetCol1( VmathVector4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( result, &mat->col1 );
+}
+
+static inline void vmathM4GetCol2( VmathVector4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( result, &mat->col2 );
+}
+
+static inline void vmathM4GetCol3( VmathVector4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( result, &mat->col3 );
+}
+
+static inline void vmathM4GetCol( VmathVector4 *result, const VmathMatrix4 *mat, int col )
+{
+    vmathV4Copy( result, (&mat->col0 + col) );
+}
+
+static inline void vmathM4GetRow( VmathVector4 *result, const VmathMatrix4 *mat, int row )
+{
+    vmathV4MakeFromElems( result, vmathV4GetElem( &mat->col0, row ), vmathV4GetElem( &mat->col1, row ), vmathV4GetElem( &mat->col2, row ), vmathV4GetElem( &mat->col3, row ) );
+}
+
+static inline void vmathM4Transpose( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3;
+    tmp0 = spu_shuffle( mat->col0.vec128, mat->col2.vec128, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( mat->col1.vec128, mat->col3.vec128, _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( mat->col0.vec128, mat->col2.vec128, _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( mat->col1.vec128, mat->col3.vec128, _VECTORMATH_SHUF_ZCWD );
+    res0 = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
+    res1 = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
+    res2 = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
+    res3 = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
+    result->col0.vec128 = res0;
+    result->col1.vec128 = res1;
+    result->col2.vec128 = res2;
+    result->col3.vec128 = res3;
+}
+
+static inline void vmathM4Inverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vec_float4 in0, in1, in2, in3;
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    vec_float4 cof0, cof1, cof2, cof3;
+    vec_float4 t0, t1, t2, t3;
+    vec_float4 t01, t02, t03, t12, t23;
+    vec_float4 t1r, t2r;
+    vec_float4 t01r, t02r, t03r, t12r, t23r;
+    vec_float4 t1r3, t1r3r;
+    vec_float4 det, det1, det2, det3, invdet;
+    in0 = mat->col0.vec128;
+    in1 = mat->col1.vec128;
+    in2 = mat->col2.vec128;
+    in3 = mat->col3.vec128;
+    /* Perform transform of the input matrix of the form:
+     *    A B C D
+     *    E F G H
+     *    I J K L
+     *    M N O P
+     *
+     * The pseudo transpose of the input matrix is trans:
+     *    A E I M
+     *    J N B F
+     *    C G K O
+     *    L P D H
+     */
+    tmp0 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_XAZC);    /* A E C G */
+    tmp1 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_XAZC);    /* I M K O */
+    tmp2 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_YBWD);    /* B F D H */
+    tmp3 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_YBWD);    /* J N L P */
+    t0 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_XYAB);    /* A E I M */
+    t1 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_XYAB);    /* J N B F */
+    t2 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_ZWCD);    /* C G K O */
+    t3 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_ZWCD);    /* L P D H */
+    /* Generate a cofactor matrix. The computed cofactors reside in
+     * cof0, cof1, cof2, cof3.
+     */
+    t23 = spu_mul(t2, t3);                        /* CL GP KD OH */
+    t23 = spu_shuffle(t23, t23, _VECTORMATH_SHUF_YXWZ);     /* GP CL OH KD */
+    cof0 = spu_mul(t1, t23);                      /* JGP NCL BOH FKD */
+    cof1 = spu_mul(t0, t23);                      /* AGP ECL IOH MKD */
+    t23r = spu_rlqwbyte(t23, 8);                  /* OH KD GP CL */
+    cof0 = spu_msub(t1, t23r, cof0);              /* JOH NKD BGP FCL  - cof0 */
+    cof1 = spu_msub(t0, t23r, cof1);              /* AOH EKD IGP MCL  - cof1 */
+    cof1 = spu_rlqwbyte(cof1, 8);                 /* IGP MCL AOH EKD - IOH MKD AGP ECL */
+
+    t12 = spu_mul(t1, t2);                        /* JC NG BK FO */
+    t12 = spu_shuffle(t12, t12, _VECTORMATH_SHUF_YXWZ);     /* NG JC FO BK */
+    cof0 = spu_madd(t3, t12, cof0);               /* LNG PJC DFO HBK + cof0 */
+    cof3 = spu_mul(t0, t12);                      /* ANG EJC IFO MBK */
+    t12r = spu_rlqwbyte(t12, 8);                  /* FO BK NG JC */
+    cof0 = spu_nmsub(t3, t12r, cof0);             /* cof0 - LFO PBK DNG HJC */
+    cof3 = spu_msub(t0, t12r, cof3);              /* AFO EBK ING MJC - cof3 */
+    cof3 = spu_rlqwbyte(cof3, 8);                 /* ING MJC AFO EBK - IFO MBK ANG EJC */
+    t1r = spu_rlqwbyte(t1, 8);                    /* B F J N */
+    t2r = spu_rlqwbyte(t2, 8);                    /* K O C G */
+    t1r3 = spu_mul(t1r, t3);                      /* BL FP JD NH */
+    t1r3 = spu_shuffle(t1r3, t1r3, _VECTORMATH_SHUF_YXWZ);  /* FP BL NH JD */
+    cof0 = spu_madd(t2r, t1r3, cof0);             /* KFP OBL CNH GJD + cof0 */
+    cof2 = spu_mul(t0, t1r3);                     /* AFP EBL INH MJD */
+    t1r3r = spu_rlqwbyte(t1r3, 8);                /* NH JD FP BL */
+    cof0 = spu_nmsub(t2r, t1r3r, cof0);           /* cof0 - KNH OJD CFP GBL */
+    cof2 = spu_msub(t0, t1r3r, cof2);             /* ANH EJD IFP MBL - cof2 */
+    cof2 = spu_rlqwbyte(cof2, 8);                 /* IFP MBL ANH EJD - INH MJD AFP EBL */
+    t01 = spu_mul(t0, t1);                                /* AJ EN IB MF */
+    t01 = spu_shuffle(t01, t01, _VECTORMATH_SHUF_YXWZ);     /* EN AJ MF IB */
+    cof2 = spu_madd(t3, t01, cof2);               /* LEN PAJ DMF HIB + cof2 */
+    cof3 = spu_msub(t2r, t01, cof3);              /* KEN OAJ CMF GIB - cof3 */
+    t01r = spu_rlqwbyte(t01, 8);                  /* MF IB EN AJ */
+    cof2 = spu_msub(t3, t01r, cof2);              /* LMF PIB DEN HAJ - cof2 */
+    cof3 = spu_nmsub(t2r, t01r, cof3);            /* cof3 - KMF OIB CEN GAJ */
+    t03 = spu_mul(t0, t3);                                /* AL EP ID MH */
+    t03 = spu_shuffle(t03, t03, _VECTORMATH_SHUF_YXWZ);     /* EP AL MH ID */
+    cof1 = spu_nmsub(t2r, t03, cof1);             /* cof1 - KEP OAL CMH GID */
+    cof2 = spu_madd(t1, t03, cof2);               /* JEP NAL BMH FID + cof2 */
+    t03r = spu_rlqwbyte(t03, 8);                  /* MH ID EP AL */
+    cof1 = spu_madd(t2r, t03r, cof1);             /* KMH OID CEP GAL + cof1 */
+    cof2 = spu_nmsub(t1, t03r, cof2);             /* cof2 - JMH NID BEP FAL */
+    t02 = spu_mul(t0, t2r);                       /* AK EO IC MG */
+    t02 = spu_shuffle(t02, t02, _VECTORMATH_SHUF_YXWZ);     /* E0 AK MG IC */
+    cof1 = spu_madd(t3, t02, cof1);               /* LEO PAK DMG HIC + cof1 */
+    cof3 = spu_nmsub(t1, t02, cof3);              /* cof3 - JEO NAK BMG FIC */
+    t02r = spu_rlqwbyte(t02, 8);                  /* MG IC EO AK */
+    cof1 = spu_nmsub(t3, t02r, cof1);             /* cof1 - LMG PIC DEO HAK */
+    cof3 = spu_madd(t1, t02r, cof3);              /* JMG NIC BEO FAK + cof3 */
+    /* Compute the determinant of the matrix
+     *
+     * det = sum_across(t0 * cof0);
+     *
+     * We perform a sum across the entire vector so that
+     * we don't have to splat the result when multiplying the
+     * cofactors by the inverse of the determinant.
+     */
+    det  = spu_mul(t0, cof0);
+    det1 = spu_rlqwbyte(det, 4);
+    det2 = spu_rlqwbyte(det, 8);
+    det3 = spu_rlqwbyte(det, 12);
+    det  = spu_add(det, det1);
+    det2 = spu_add(det2, det3);
+    det  = spu_add(det, det2);
+    /* Compute the reciprocal of the determinant.
+     */
+    invdet = recipf4(det);
+    /* Multiply the cofactors by the reciprocal of the determinant.
+     */
+    result->col0.vec128 = spu_mul(cof0, invdet);
+    result->col1.vec128 = spu_mul(cof1, invdet);
+    result->col2.vec128 = spu_mul(cof2, invdet);
+    result->col3.vec128 = spu_mul(cof3, invdet);
+}
+
+static inline void vmathM4AffineInverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    VmathTransform3 affineMat, tmpT3_0;
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    vmathV4GetXYZ( &tmpV3_0, &mat->col0 );
+    vmathT3SetCol0( &affineMat, &tmpV3_0 );
+    vmathV4GetXYZ( &tmpV3_1, &mat->col1 );
+    vmathT3SetCol1( &affineMat, &tmpV3_1 );
+    vmathV4GetXYZ( &tmpV3_2, &mat->col2 );
+    vmathT3SetCol2( &affineMat, &tmpV3_2 );
+    vmathV4GetXYZ( &tmpV3_3, &mat->col3 );
+    vmathT3SetCol3( &affineMat, &tmpV3_3 );
+    vmathT3Inverse( &tmpT3_0, &affineMat );
+    vmathM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline void vmathM4OrthoInverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    VmathTransform3 affineMat, tmpT3_0;
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    vmathV4GetXYZ( &tmpV3_0, &mat->col0 );
+    vmathT3SetCol0( &affineMat, &tmpV3_0 );
+    vmathV4GetXYZ( &tmpV3_1, &mat->col1 );
+    vmathT3SetCol1( &affineMat, &tmpV3_1 );
+    vmathV4GetXYZ( &tmpV3_2, &mat->col2 );
+    vmathT3SetCol2( &affineMat, &tmpV3_2 );
+    vmathV4GetXYZ( &tmpV3_3, &mat->col3 );
+    vmathT3SetCol3( &affineMat, &tmpV3_3 );
+    vmathT3OrthoInverse( &tmpT3_0, &affineMat );
+    vmathM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline float vmathM4Determinant( const VmathMatrix4 *mat )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vec_float4 in0, in1, in2, in3;
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    vec_float4 cof0;
+    vec_float4 t0, t1, t2, t3;
+    vec_float4 t12, t23;
+    vec_float4 t1r, t2r;
+    vec_float4 t12r, t23r;
+    vec_float4 t1r3, t1r3r;
+    in0 = mat->col0.vec128;
+    in1 = mat->col1.vec128;
+    in2 = mat->col2.vec128;
+    in3 = mat->col3.vec128;
+    /* Perform transform of the input matrix of the form:
+     *    A B C D
+     *    E F G H
+     *    I J K L
+     *    M N O P
+     *
+     * The pseudo transpose of the input matrix is trans:
+     *    A E I M
+     *    J N B F
+     *    C G K O
+     *    L P D H
+     */
+    tmp0 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_XAZC);    /* A E C G */
+    tmp1 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_XAZC);    /* I M K O */
+    tmp2 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_YBWD);    /* B F D H */
+    tmp3 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_YBWD);    /* J N L P */
+    t0 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_XYAB);    /* A E I M */
+    t1 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_XYAB);    /* J N B F */
+    t2 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_ZWCD);    /* C G K O */
+    t3 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_ZWCD);    /* L P D H */
+    /* Generate a cofactor matrix. The computed cofactors reside in
+     * cof0, cof1, cof2, cof3.
+     */
+    t23 = spu_mul(t2, t3);                        /* CL GP KD OH */
+    t23 = spu_shuffle(t23, t23, _VECTORMATH_SHUF_YXWZ);     /* GP CL OH KD */
+    cof0 = spu_mul(t1, t23);                      /* JGP NCL BOH FKD */
+    t23r = spu_rlqwbyte(t23, 8);                  /* OH KD GP CL */
+    cof0 = spu_msub(t1, t23r, cof0);              /* JOH NKD BGP FCL  - cof0 */
+
+    t12 = spu_mul(t1, t2);                        /* JC NG BK FO */
+    t12 = spu_shuffle(t12, t12, _VECTORMATH_SHUF_YXWZ);     /* NG JC FO BK */
+    cof0 = spu_madd(t3, t12, cof0);               /* LNG PJC DFO HBK + cof0 */
+    t12r = spu_rlqwbyte(t12, 8);                  /* FO BK NG JC */
+    cof0 = spu_nmsub(t3, t12r, cof0);             /* cof0 - LFO PBK DNG HJC */
+    t1r = spu_rlqwbyte(t1, 8);                    /* B F J N */
+    t2r = spu_rlqwbyte(t2, 8);                    /* K O C G */
+    t1r3 = spu_mul(t1r, t3);                      /* BL FP JD NH */
+    t1r3 = spu_shuffle(t1r3, t1r3, _VECTORMATH_SHUF_YXWZ);  /* FP BL NH JD */
+    cof0 = spu_madd(t2r, t1r3, cof0);             /* KFP OBL CNH GJD + cof0 */
+    t1r3r = spu_rlqwbyte(t1r3, 8);                /* NH JD FP BL */
+    cof0 = spu_nmsub(t2r, t1r3r, cof0);           /* cof0 - KNH OJD CFP GBL */
+    return spu_extract( _vmathVfDot4(t0,cof0), 0 );
+}
+
+static inline void vmathM4Add( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
+{
+    vmathV4Add( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV4Add( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV4Add( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathV4Add( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathM4Sub( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
+{
+    vmathV4Sub( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV4Sub( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV4Sub( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathV4Sub( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathM4Neg( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Neg( &result->col0, &mat->col0 );
+    vmathV4Neg( &result->col1, &mat->col1 );
+    vmathV4Neg( &result->col2, &mat->col2 );
+    vmathV4Neg( &result->col3, &mat->col3 );
+}
+
+static inline void vmathM4AbsPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4AbsPerElem( &result->col0, &mat->col0 );
+    vmathV4AbsPerElem( &result->col1, &mat->col1 );
+    vmathV4AbsPerElem( &result->col2, &mat->col2 );
+    vmathV4AbsPerElem( &result->col3, &mat->col3 );
+}
+
+static inline void vmathM4ScalarMul( VmathMatrix4 *result, const VmathMatrix4 *mat, float scalar )
+{
+    vmathV4ScalarMul( &result->col0, &mat->col0, scalar );
+    vmathV4ScalarMul( &result->col1, &mat->col1, scalar );
+    vmathV4ScalarMul( &result->col2, &mat->col2, scalar );
+    vmathV4ScalarMul( &result->col3, &mat->col3, scalar );
+}
+
+static inline void vmathM4MulV4( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector4 *vec )
+{
+    vec_float4 tmp0, tmp1, res;
+    vec_float4 xxxx, yyyy, zzzz, wwww;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
+    xxxx = spu_shuffle( vec->vec128, vec->vec128, shuffle_xxxx );
+    yyyy = spu_shuffle( vec->vec128, vec->vec128, shuffle_yyyy );
+    zzzz = spu_shuffle( vec->vec128, vec->vec128, shuffle_zzzz );
+    wwww = spu_shuffle( vec->vec128, vec->vec128, shuffle_wwww );
+    tmp0 = spu_mul( mat->col0.vec128, xxxx );
+    tmp1 = spu_mul( mat->col1.vec128, yyyy );
+    tmp0 = spu_madd( mat->col2.vec128, zzzz, tmp0 );
+    tmp1 = spu_madd( mat->col3.vec128, wwww, tmp1 );
+    res = spu_add( tmp0, tmp1 );
+    result->vec128 = res;
+}
+
+static inline void vmathM4MulV3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector3 *vec )
+{
+    vec_float4 res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    xxxx = spu_shuffle( vec->vec128, vec->vec128, shuffle_xxxx );
+    yyyy = spu_shuffle( vec->vec128, vec->vec128, shuffle_yyyy );
+    zzzz = spu_shuffle( vec->vec128, vec->vec128, shuffle_zzzz );
+    res = spu_mul( mat->col0.vec128, xxxx );
+    res = spu_madd( mat->col1.vec128, yyyy, res );
+    res = spu_madd( mat->col2.vec128, zzzz, res );
+    result->vec128 = res;
+}
+
+static inline void vmathM4MulP3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathPoint3 *pnt )
+{
+    vec_float4 tmp0, tmp1, res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    xxxx = spu_shuffle( pnt->vec128, pnt->vec128, shuffle_xxxx );
+    yyyy = spu_shuffle( pnt->vec128, pnt->vec128, shuffle_yyyy );
+    zzzz = spu_shuffle( pnt->vec128, pnt->vec128, shuffle_zzzz );
+    tmp0 = spu_mul( mat->col0.vec128, xxxx );
+    tmp1 = spu_mul( mat->col1.vec128, yyyy );
+    tmp0 = spu_madd( mat->col2.vec128, zzzz, tmp0 );
+    tmp1 = spu_add( mat->col3.vec128, tmp1 );
+    res = spu_add( tmp0, tmp1 );
+    result->vec128 = res;
+}
+
+static inline void vmathM4Mul( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
+{
+    VmathMatrix4 tmpResult;
+    vmathM4MulV4( &tmpResult.col0, mat0, &mat1->col0 );
+    vmathM4MulV4( &tmpResult.col1, mat0, &mat1->col1 );
+    vmathM4MulV4( &tmpResult.col2, mat0, &mat1->col2 );
+    vmathM4MulV4( &tmpResult.col3, mat0, &mat1->col3 );
+    vmathM4Copy( result, &tmpResult );
+}
+
+static inline void vmathM4MulT3( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathTransform3 *tfrm1 )
+{
+    VmathMatrix4 tmpResult;
+    VmathPoint3 tmpP3_0;
+    vmathM4MulV3( &tmpResult.col0, mat, &tfrm1->col0 );
+    vmathM4MulV3( &tmpResult.col1, mat, &tfrm1->col1 );
+    vmathM4MulV3( &tmpResult.col2, mat, &tfrm1->col2 );
+    vmathP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
+    vmathM4MulP3( &tmpResult.col3, mat, &tmpP3_0 );
+    vmathM4Copy( result, &tmpResult );
+}
+
+static inline void vmathM4MulPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
+{
+    vmathV4MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV4MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV4MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathV4MulPerElem( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathM4MakeIdentity( VmathMatrix4 *result )
+{
+    vmathV4MakeXAxis( &result->col0 );
+    vmathV4MakeYAxis( &result->col1 );
+    vmathV4MakeZAxis( &result->col2 );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4SetUpper3x3( VmathMatrix4 *result, const VmathMatrix3 *mat3 )
+{
+    vmathV4SetXYZ( &result->col0, &mat3->col0 );
+    vmathV4SetXYZ( &result->col1, &mat3->col1 );
+    vmathV4SetXYZ( &result->col2, &mat3->col2 );
+}
+
+static inline void vmathM4GetUpper3x3( VmathMatrix3 *result, const VmathMatrix4 *mat )
+{
+    vmathV4GetXYZ( &result->col0, &mat->col0 );
+    vmathV4GetXYZ( &result->col1, &mat->col1 );
+    vmathV4GetXYZ( &result->col2, &mat->col2 );
+}
+
+static inline void vmathM4SetTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec )
+{
+    vmathV4SetXYZ( &result->col3, translateVec );
+}
+
+static inline void vmathM4GetTranslation( VmathVector3 *result, const VmathMatrix4 *mat )
+{
+    vmathV4GetXYZ( result, &mat->col3 );
+}
+
+static inline void vmathM4MakeRotationX( VmathMatrix4 *result, float radians )
+{
+    vec_float4 s, c, res1, res2;
+    vec_uint4 select_y, select_z;
+    vec_float4 zero;
+    select_y = (vec_uint4)spu_maskb(0x0f00);
+    select_z = (vec_uint4)spu_maskb(0x00f0);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res1 = spu_sel( zero, c, select_y );
+    res1 = spu_sel( res1, s, select_z );
+    res2 = spu_sel( zero, negatef4(s), select_y );
+    res2 = spu_sel( res2, c, select_z );
+    vmathV4MakeXAxis( &result->col0 );
+    result->col1.vec128 = res1;
+    result->col2.vec128 = res2;
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationY( VmathMatrix4 *result, float radians )
+{
+    vec_float4 s, c, res0, res2;
+    vec_uint4 select_x, select_z;
+    vec_float4 zero;
+    select_x = (vec_uint4)spu_maskb(0xf000);
+    select_z = (vec_uint4)spu_maskb(0x00f0);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res0 = spu_sel( zero, c, select_x );
+    res0 = spu_sel( res0, negatef4(s), select_z );
+    res2 = spu_sel( zero, s, select_x );
+    res2 = spu_sel( res2, c, select_z );
+    result->col0.vec128 = res0;
+    vmathV4MakeYAxis( &result->col1 );
+    result->col2.vec128 = res2;
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationZ( VmathMatrix4 *result, float radians )
+{
+    vec_float4 s, c, res0, res1;
+    vec_uint4 select_x, select_y;
+    vec_float4 zero;
+    select_x = (vec_uint4)spu_maskb(0xf000);
+    select_y = (vec_uint4)spu_maskb(0x0f00);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res0 = spu_sel( zero, c, select_x );
+    res0 = spu_sel( res0, s, select_y );
+    res1 = spu_sel( zero, negatef4(s), select_x );
+    res1 = spu_sel( res1, c, select_y );
+    result->col0.vec128 = res0;
+    result->col1.vec128 = res1;
+    vmathV4MakeZAxis( &result->col2 );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationZYX( VmathMatrix4 *result, const VmathVector3 *radiansXYZ )
+{
+    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    angles = radiansXYZ->vec128;
+    angles = spu_insert( 0.0f, angles, 3 );
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
+    Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
+    Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
+    Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
+    X0 = spu_shuffle( s, s, shuffle_xxxx );
+    X1 = spu_shuffle( c, c, shuffle_xxxx );
+    tmp = spu_mul( Z0, Y1 );
+    result->col0.vec128 = spu_mul( Z0, Y0 );
+    result->col1.vec128 = spu_madd( Z1, X1, spu_mul( tmp, X0 ) );
+    result->col2.vec128 = spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationAxis( VmathMatrix4 *result, float radians, const VmathVector3 *unitVec )
+{
+    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2, zeroW;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    axis = unitVec->vec128;
+    sincosf4( spu_splats( radians ), &s, &c );
+    xxxx = spu_shuffle( axis, axis, shuffle_xxxx );
+    yyyy = spu_shuffle( axis, axis, shuffle_yyyy );
+    zzzz = spu_shuffle( axis, axis, shuffle_zzzz );
+    oneMinusC = spu_sub( spu_splats(1.0f), c );
+    axisS = spu_mul( axis, s );
+    negAxisS = negatef4( axisS );
+    tmp0 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_0ZB0 );
+    tmp1 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_C0X0 );
+    tmp2 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_YA00 );
+    tmp0 = spu_sel( tmp0, c, (vec_uint4)spu_maskb(0xf000) );
+    tmp1 = spu_sel( tmp1, c, (vec_uint4)spu_maskb(0x0f00) );
+    tmp2 = spu_sel( tmp2, c, (vec_uint4)spu_maskb(0x00f0) );
+    zeroW = (vec_float4)spu_maskb(0x000f);
+    axis = spu_andc( axis, zeroW );
+    result->col0.vec128 = spu_madd( spu_mul( axis, xxxx ), oneMinusC, tmp0 );
+    result->col1.vec128 = spu_madd( spu_mul( axis, yyyy ), oneMinusC, tmp1 );
+    result->col2.vec128 = spu_madd( spu_mul( axis, zzzz ), oneMinusC, tmp2 );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationQ( VmathMatrix4 *result, const VmathQuat *unitQuat )
+{
+    VmathTransform3 tmpT3_0;
+    vmathT3MakeRotationQ( &tmpT3_0, unitQuat );
+    vmathM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline void vmathM4MakeScale( VmathMatrix4 *result, const VmathVector3 *scaleVec )
+{
+    vec_float4 zero = spu_splats(0.0f);
+    result->col0.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0xf000) );
+    result->col1.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0x0f00) );
+    result->col2.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0x00f0) );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4AppendScale( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathVector3 *scaleVec )
+{
+    vmathV4ScalarMul( &result->col0, &mat->col0, vmathV3GetX( scaleVec ) );
+    vmathV4ScalarMul( &result->col1, &mat->col1, vmathV3GetY( scaleVec ) );
+    vmathV4ScalarMul( &result->col2, &mat->col2, vmathV3GetZ( scaleVec ) );
+    vmathV4Copy( &result->col3, &mat->col3 );
+}
+
+static inline void vmathM4PrependScale( VmathMatrix4 *result, const VmathVector3 *scaleVec, const VmathMatrix4 *mat )
+{
+    VmathVector4 scale4;
+    vmathV4MakeFromV3Scalar( &scale4, scaleVec, 1.0f );
+    vmathV4MulPerElem( &result->col0, &mat->col0, &scale4 );
+    vmathV4MulPerElem( &result->col1, &mat->col1, &scale4 );
+    vmathV4MulPerElem( &result->col2, &mat->col2, &scale4 );
+    vmathV4MulPerElem( &result->col3, &mat->col3, &scale4 );
+}
+
+static inline void vmathM4MakeTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec )
+{
+    vmathV4MakeXAxis( &result->col0 );
+    vmathV4MakeYAxis( &result->col1 );
+    vmathV4MakeZAxis( &result->col2 );
+    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
+}
+
+static inline void vmathM4MakeLookAt( VmathMatrix4 *result, const VmathPoint3 *eyePos, const VmathPoint3 *lookAtPos, const VmathVector3 *upVec )
+{
+    VmathMatrix4 m4EyeFrame;
+    VmathVector3 v3X, v3Y, v3Z, tmpV3_0, tmpV3_1;
+    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
+    vmathV3Normalize( &v3Y, upVec );
+    vmathP3Sub( &tmpV3_0, eyePos, lookAtPos );
+    vmathV3Normalize( &v3Z, &tmpV3_0 );
+    vmathV3Cross( &tmpV3_1, &v3Y, &v3Z );
+    vmathV3Normalize( &v3X, &tmpV3_1 );
+    vmathV3Cross( &v3Y, &v3Z, &v3X );
+    vmathV4MakeFromV3( &tmpV4_0, &v3X );
+    vmathV4MakeFromV3( &tmpV4_1, &v3Y );
+    vmathV4MakeFromV3( &tmpV4_2, &v3Z );
+    vmathV4MakeFromP3( &tmpV4_3, eyePos );
+    vmathM4MakeFromCols( &m4EyeFrame, &tmpV4_0, &tmpV4_1, &tmpV4_2, &tmpV4_3 );
+    vmathM4OrthoInverse( result, &m4EyeFrame );
+}
+
+static inline void vmathM4MakePerspective( VmathMatrix4 *result, float fovyRadians, float aspect, float zNear, float zFar )
+{
+    float f, rangeInv;
+    vec_float4 zero, col0, col1, col2, col3;
+    f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f );
+    rangeInv = 1.0f / ( zNear - zFar );
+    zero = spu_splats(0.0f);
+    col0 = zero;
+    col1 = zero;
+    col2 = zero;
+    col3 = zero;
+    col0 = spu_insert( f / aspect, col0, 0 );
+    col1 = spu_insert( f, col1, 1 );
+    col2 = spu_insert( ( zNear + zFar ) * rangeInv, col2, 2 );
+    col2 = spu_insert( -1.0f, col2, 3 );
+    col3 = spu_insert( zNear * zFar * rangeInv * 2.0f, col3, 2 );
+    result->col0.vec128 = col0;
+    result->col1.vec128 = col1;
+    result->col2.vec128 = col2;
+    result->col3.vec128 = col3;
+}
+
+static inline void vmathM4MakeFrustum( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vec_float4 lbf, rtn;
+    vec_float4 diff, sum, inv_diff;
+    vec_float4 diagonal, column, near2;
+    vec_float4 zero = spu_splats(0.0f);
+    lbf = spu_shuffle( spu_promote(left,0), spu_promote(zFar,0), _VECTORMATH_SHUF_XAYB );
+    rtn = spu_shuffle( spu_promote(right,0), spu_promote(zNear,0), _VECTORMATH_SHUF_XAYB );
+    lbf = spu_shuffle( lbf, spu_promote(bottom,0), _VECTORMATH_SHUF_XAYB );
+    rtn = spu_shuffle( rtn, spu_promote(top,0), _VECTORMATH_SHUF_XAYB );
+    diff = spu_sub( rtn, lbf );
+    sum  = spu_add( rtn, lbf );
+    inv_diff = recipf4( diff );
+    near2 = spu_splats( zNear );
+    near2 = spu_add( near2, near2 );
+    diagonal = spu_mul( near2, inv_diff );
+    column = spu_mul( sum, inv_diff );
+    result->col0.vec128 = spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0xf000) );
+    result->col1.vec128 = spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x0f00) );
+    result->col2.vec128 = spu_sel( column, spu_splats(-1.0f), (vec_uint4)spu_maskb(0x000f) );
+    result->col3.vec128 = spu_sel( zero, spu_mul( diagonal, spu_splats(zFar) ), (vec_uint4)spu_maskb(0x00f0) );
+}
+
+static inline void vmathM4MakeOrthographic( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vec_float4 lbf, rtn;
+    vec_float4 diff, sum, inv_diff, neg_inv_diff;
+    vec_float4 diagonal, column;
+    vec_float4 zero = spu_splats(0.0f);
+    lbf = spu_shuffle( spu_promote(left,0), spu_promote(zFar,0), _VECTORMATH_SHUF_XAYB );
+    rtn = spu_shuffle( spu_promote(right,0), spu_promote(zNear,0), _VECTORMATH_SHUF_XAYB );
+    lbf = spu_shuffle( lbf, spu_promote(bottom,0), _VECTORMATH_SHUF_XAYB );
+    rtn = spu_shuffle( rtn, spu_promote(top,0), _VECTORMATH_SHUF_XAYB );
+    diff = spu_sub( rtn, lbf );
+    sum  = spu_add( rtn, lbf );
+    inv_diff = recipf4( diff );
+    neg_inv_diff = negatef4( inv_diff );
+    diagonal = spu_add( inv_diff, inv_diff );
+    column = spu_mul( sum, spu_sel( neg_inv_diff, inv_diff, (vec_uint4)spu_maskb(0x00f0) ) );
+    result->col0.vec128 = spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0xf000) );
+    result->col1.vec128 = spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x0f00) );
+    result->col2.vec128 = spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x00f0) );
+    result->col3.vec128 = spu_sel( column, spu_splats(1.0f), (vec_uint4)spu_maskb(0x000f) );
+}
+
+static inline void vmathM4Select( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, unsigned int select1 )
+{
+    vmathV4Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
+    vmathV4Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
+    vmathV4Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
+    vmathV4Select( &result->col3, &mat0->col3, &mat1->col3, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathM4Print( const VmathMatrix4 *mat )
+{
+    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
+    vmathM4GetRow( &tmpV4_0, mat, 0 );
+    vmathV4Print( &tmpV4_0 );
+    vmathM4GetRow( &tmpV4_1, mat, 1 );
+    vmathV4Print( &tmpV4_1 );
+    vmathM4GetRow( &tmpV4_2, mat, 2 );
+    vmathV4Print( &tmpV4_2 );
+    vmathM4GetRow( &tmpV4_3, mat, 3 );
+    vmathV4Print( &tmpV4_3 );
+}
+
+static inline void vmathM4Prints( const VmathMatrix4 *mat, const char *name )
+{
+    printf("%s:\n", name);
+    vmathM4Print( mat );
+}
+
+#endif
+
+static inline void vmathT3Copy( VmathTransform3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( &result->col0, &tfrm->col0 );
+    vmathV3Copy( &result->col1, &tfrm->col1 );
+    vmathV3Copy( &result->col2, &tfrm->col2 );
+    vmathV3Copy( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathT3MakeFromScalar( VmathTransform3 *result, float scalar )
+{
+    vmathV3MakeFromScalar( &result->col0, scalar );
+    vmathV3MakeFromScalar( &result->col1, scalar );
+    vmathV3MakeFromScalar( &result->col2, scalar );
+    vmathV3MakeFromScalar( &result->col3, scalar );
+}
+
+static inline void vmathT3MakeFromCols( VmathTransform3 *result, const VmathVector3 *_col0, const VmathVector3 *_col1, const VmathVector3 *_col2, const VmathVector3 *_col3 )
+{
+    vmathV3Copy( &result->col0, _col0 );
+    vmathV3Copy( &result->col1, _col1 );
+    vmathV3Copy( &result->col2, _col2 );
+    vmathV3Copy( &result->col3, _col3 );
+}
+
+static inline void vmathT3MakeFromM3V3( VmathTransform3 *result, const VmathMatrix3 *tfrm, const VmathVector3 *translateVec )
+{
+    vmathT3SetUpper3x3( result, tfrm );
+    vmathT3SetTranslation( result, translateVec );
+}
+
+static inline void vmathT3MakeFromQV3( VmathTransform3 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec )
+{
+    VmathMatrix3 tmpM3_0;
+    vmathM3MakeFromQ( &tmpM3_0, unitQuat );
+    vmathT3SetUpper3x3( result, &tmpM3_0 );
+    vmathT3SetTranslation( result, translateVec );
+}
+
+static inline void vmathT3SetCol0( VmathTransform3 *result, const VmathVector3 *_col0 )
+{
+    vmathV3Copy( &result->col0, _col0 );
+}
+
+static inline void vmathT3SetCol1( VmathTransform3 *result, const VmathVector3 *_col1 )
+{
+    vmathV3Copy( &result->col1, _col1 );
+}
+
+static inline void vmathT3SetCol2( VmathTransform3 *result, const VmathVector3 *_col2 )
+{
+    vmathV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathT3SetCol3( VmathTransform3 *result, const VmathVector3 *_col3 )
+{
+    vmathV3Copy( &result->col3, _col3 );
+}
+
+static inline void vmathT3SetCol( VmathTransform3 *result, int col, const VmathVector3 *vec )
+{
+    vmathV3Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathT3SetRow( VmathTransform3 *result, int row, const VmathVector4 *vec )
+{
+    vmathV3SetElem( &result->col0, row, vmathV4GetElem( vec, 0 ) );
+    vmathV3SetElem( &result->col1, row, vmathV4GetElem( vec, 1 ) );
+    vmathV3SetElem( &result->col2, row, vmathV4GetElem( vec, 2 ) );
+    vmathV3SetElem( &result->col3, row, vmathV4GetElem( vec, 3 ) );
+}
+
+static inline void vmathT3SetElem( VmathTransform3 *result, int col, int row, float val )
+{
+    VmathVector3 tmpV3_0;
+    vmathT3GetCol( &tmpV3_0, result, col );
+    vmathV3SetElem( &tmpV3_0, row, val );
+    vmathT3SetCol( result, col, &tmpV3_0 );
+}
+
+static inline float vmathT3GetElem( const VmathTransform3 *tfrm, int col, int row )
+{
+    VmathVector3 tmpV3_0;
+    vmathT3GetCol( &tmpV3_0, tfrm, col );
+    return vmathV3GetElem( &tmpV3_0, row );
+}
+
+static inline void vmathT3GetCol0( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col0 );
+}
+
+static inline void vmathT3GetCol1( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col1 );
+}
+
+static inline void vmathT3GetCol2( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col2 );
+}
+
+static inline void vmathT3GetCol3( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col3 );
+}
+
+static inline void vmathT3GetCol( VmathVector3 *result, const VmathTransform3 *tfrm, int col )
+{
+    vmathV3Copy( result, (&tfrm->col0 + col) );
+}
+
+static inline void vmathT3GetRow( VmathVector4 *result, const VmathTransform3 *tfrm, int row )
+{
+    vmathV4MakeFromElems( result, vmathV3GetElem( &tfrm->col0, row ), vmathV3GetElem( &tfrm->col1, row ), vmathV3GetElem( &tfrm->col2, row ), vmathV3GetElem( &tfrm->col3, row ) );
+}
+
+static inline void vmathT3Inverse( VmathTransform3 *result, const VmathTransform3 *tfrm )
+{
+    vec_float4 inv0, inv1, inv2, inv3;
+    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    tmp2 = _vmathVfCross( tfrm->col0.vec128, tfrm->col1.vec128 );
+    tmp0 = _vmathVfCross( tfrm->col1.vec128, tfrm->col2.vec128 );
+    tmp1 = _vmathVfCross( tfrm->col2.vec128, tfrm->col0.vec128 );
+    inv3 = negatef4( tfrm->col3.vec128 );
+    dot = _vmathVfDot3( tmp2, tfrm->col2.vec128 );
+    dot = spu_shuffle( dot, dot, shuffle_xxxx );
+    invdet = recipf4( dot );
+    tmp3 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_XAYB );
+    tmp4 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_ZCWD );
+    inv0 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_XAYB );
+    xxxx = spu_shuffle( inv3, inv3, shuffle_xxxx );
+    inv1 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_ZBW0 );
+    inv2 = spu_shuffle( tmp4, tmp1, _VECTORMATH_SHUF_XCY0 );
+    yyyy = spu_shuffle( inv3, inv3, shuffle_yyyy );
+    zzzz = spu_shuffle( inv3, inv3, shuffle_zzzz );
+    inv3 = spu_mul( inv0, xxxx );
+    inv3 = spu_madd( inv1, yyyy, inv3 );
+    inv3 = spu_madd( inv2, zzzz, inv3 );
+    inv0 = spu_mul( inv0, invdet );
+    inv1 = spu_mul( inv1, invdet );
+    inv2 = spu_mul( inv2, invdet );
+    inv3 = spu_mul( inv3, invdet );
+    result->col0.vec128 = inv0;
+    result->col1.vec128 = inv1;
+    result->col2.vec128 = inv2;
+    result->col3.vec128 = inv3;
+}
+
+static inline void vmathT3OrthoInverse( VmathTransform3 *result, const VmathTransform3 *tfrm )
+{
+    vec_float4 inv0, inv1, inv2, inv3;
+    vec_float4 tmp0, tmp1;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    tmp0 = spu_shuffle( tfrm->col0.vec128, tfrm->col2.vec128, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( tfrm->col0.vec128, tfrm->col2.vec128, _VECTORMATH_SHUF_ZCWD );
+    inv3 = negatef4( tfrm->col3.vec128 );
+    inv0 = spu_shuffle( tmp0, tfrm->col1.vec128, _VECTORMATH_SHUF_XAYB );
+    xxxx = spu_shuffle( inv3, inv3, shuffle_xxxx );
+    inv1 = spu_shuffle( tmp0, tfrm->col1.vec128, _VECTORMATH_SHUF_ZBW0 );
+    inv2 = spu_shuffle( tmp1, tfrm->col1.vec128, _VECTORMATH_SHUF_XCY0 );
+    yyyy = spu_shuffle( inv3, inv3, shuffle_yyyy );
+    zzzz = spu_shuffle( inv3, inv3, shuffle_zzzz );
+    inv3 = spu_mul( inv0, xxxx );
+    inv3 = spu_madd( inv1, yyyy, inv3 );
+    inv3 = spu_madd( inv2, zzzz, inv3 );
+    result->col0.vec128 = inv0;
+    result->col1.vec128 = inv1;
+    result->col2.vec128 = inv2;
+    result->col3.vec128 = inv3;
+}
+
+static inline void vmathT3AbsPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3AbsPerElem( &result->col0, &tfrm->col0 );
+    vmathV3AbsPerElem( &result->col1, &tfrm->col1 );
+    vmathV3AbsPerElem( &result->col2, &tfrm->col2 );
+    vmathV3AbsPerElem( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathT3MulV3( VmathVector3 *result, const VmathTransform3 *tfrm, const VmathVector3 *vec )
+{
+    vec_float4 res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    xxxx = spu_shuffle( vec->vec128, vec->vec128, shuffle_xxxx );
+    yyyy = spu_shuffle( vec->vec128, vec->vec128, shuffle_yyyy );
+    zzzz = spu_shuffle( vec->vec128, vec->vec128, shuffle_zzzz );
+    res = spu_mul( tfrm->col0.vec128, xxxx );
+    res = spu_madd( tfrm->col1.vec128, yyyy, res );
+    res = spu_madd( tfrm->col2.vec128, zzzz, res );
+    result->vec128 = res;
+}
+
+static inline void vmathT3MulP3( VmathPoint3 *result, const VmathTransform3 *tfrm, const VmathPoint3 *pnt )
+{
+    vec_float4 tmp0, tmp1, res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    xxxx = spu_shuffle( pnt->vec128, pnt->vec128, shuffle_xxxx );
+    yyyy = spu_shuffle( pnt->vec128, pnt->vec128, shuffle_yyyy );
+    zzzz = spu_shuffle( pnt->vec128, pnt->vec128, shuffle_zzzz );
+    tmp0 = spu_mul( tfrm->col0.vec128, xxxx );
+    tmp1 = spu_mul( tfrm->col1.vec128, yyyy );
+    tmp0 = spu_madd( tfrm->col2.vec128, zzzz, tmp0 );
+    tmp1 = spu_add( tfrm->col3.vec128, tmp1 );
+    res = spu_add( tmp0, tmp1 );
+    result->vec128 = res;
+}
+
+static inline void vmathT3Mul( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 )
+{
+    VmathTransform3 tmpResult;
+    VmathPoint3 tmpP3_0, tmpP3_1;
+    vmathT3MulV3( &tmpResult.col0, tfrm0, &tfrm1->col0 );
+    vmathT3MulV3( &tmpResult.col1, tfrm0, &tfrm1->col1 );
+    vmathT3MulV3( &tmpResult.col2, tfrm0, &tfrm1->col2 );
+    vmathP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
+    vmathT3MulP3( &tmpP3_1, tfrm0, &tmpP3_0 );
+    vmathV3MakeFromP3( &tmpResult.col3, &tmpP3_1 );
+    vmathT3Copy( result, &tmpResult );
+}
+
+static inline void vmathT3MulPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 )
+{
+    vmathV3MulPerElem( &result->col0, &tfrm0->col0, &tfrm1->col0 );
+    vmathV3MulPerElem( &result->col1, &tfrm0->col1, &tfrm1->col1 );
+    vmathV3MulPerElem( &result->col2, &tfrm0->col2, &tfrm1->col2 );
+    vmathV3MulPerElem( &result->col3, &tfrm0->col3, &tfrm1->col3 );
+}
+
+static inline void vmathT3MakeIdentity( VmathTransform3 *result )
+{
+    vmathV3MakeXAxis( &result->col0 );
+    vmathV3MakeYAxis( &result->col1 );
+    vmathV3MakeZAxis( &result->col2 );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3SetUpper3x3( VmathTransform3 *result, const VmathMatrix3 *tfrm )
+{
+    vmathV3Copy( &result->col0, &tfrm->col0 );
+    vmathV3Copy( &result->col1, &tfrm->col1 );
+    vmathV3Copy( &result->col2, &tfrm->col2 );
+}
+
+static inline void vmathT3GetUpper3x3( VmathMatrix3 *result, const VmathTransform3 *tfrm )
+{
+    vmathM3MakeFromCols( result, &tfrm->col0, &tfrm->col1, &tfrm->col2 );
+}
+
+static inline void vmathT3SetTranslation( VmathTransform3 *result, const VmathVector3 *translateVec )
+{
+    vmathV3Copy( &result->col3, translateVec );
+}
+
+static inline void vmathT3GetTranslation( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col3 );
+}
+
+static inline void vmathT3MakeRotationX( VmathTransform3 *result, float radians )
+{
+    vec_float4 s, c, res1, res2;
+    vec_uint4 select_y, select_z;
+    vec_float4 zero;
+    select_y = (vec_uint4)spu_maskb(0x0f00);
+    select_z = (vec_uint4)spu_maskb(0x00f0);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res1 = spu_sel( zero, c, select_y );
+    res1 = spu_sel( res1, s, select_z );
+    res2 = spu_sel( zero, negatef4(s), select_y );
+    res2 = spu_sel( res2, c, select_z );
+    vmathV3MakeXAxis( &result->col0 );
+    result->col1.vec128 = res1;
+    result->col2.vec128 = res2;
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3MakeRotationY( VmathTransform3 *result, float radians )
+{
+    vec_float4 s, c, res0, res2;
+    vec_uint4 select_x, select_z;
+    vec_float4 zero;
+    select_x = (vec_uint4)spu_maskb(0xf000);
+    select_z = (vec_uint4)spu_maskb(0x00f0);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res0 = spu_sel( zero, c, select_x );
+    res0 = spu_sel( res0, negatef4(s), select_z );
+    res2 = spu_sel( zero, s, select_x );
+    res2 = spu_sel( res2, c, select_z );
+    result->col0.vec128 = res0;
+    vmathV3MakeYAxis( &result->col1 );
+    result->col2.vec128 = res2;
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3MakeRotationZ( VmathTransform3 *result, float radians )
+{
+    vec_float4 s, c, res0, res1;
+    vec_uint4 select_x, select_y;
+    vec_float4 zero;
+    select_x = (vec_uint4)spu_maskb(0xf000);
+    select_y = (vec_uint4)spu_maskb(0x0f00);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res0 = spu_sel( zero, c, select_x );
+    res0 = spu_sel( res0, s, select_y );
+    res1 = spu_sel( zero, negatef4(s), select_x );
+    res1 = spu_sel( res1, c, select_y );
+    result->col0.vec128 = res0;
+    result->col1.vec128 = res1;
+    vmathV3MakeZAxis( &result->col2 );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3MakeRotationZYX( VmathTransform3 *result, const VmathVector3 *radiansXYZ )
+{
+    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    angles = radiansXYZ->vec128;
+    angles = spu_insert( 0.0f, angles, 3 );
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
+    Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
+    Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
+    Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
+    X0 = spu_shuffle( s, s, shuffle_xxxx );
+    X1 = spu_shuffle( c, c, shuffle_xxxx );
+    tmp = spu_mul( Z0, Y1 );
+    result->col0.vec128 = spu_mul( Z0, Y0 );
+    result->col1.vec128 = spu_madd( Z1, X1, spu_mul( tmp, X0 ) );
+    result->col2.vec128 = spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3MakeRotationAxis( VmathTransform3 *result, float radians, const VmathVector3 *unitVec )
+{
+    VmathMatrix3 tmpM3_0;
+    VmathVector3 tmpV3_0;
+    vmathM3MakeRotationAxis( &tmpM3_0, radians, unitVec );
+    vmathV3MakeFromScalar( &tmpV3_0, 0.0f );
+    vmathT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
+}
+
+static inline void vmathT3MakeRotationQ( VmathTransform3 *result, const VmathQuat *unitQuat )
+{
+    VmathMatrix3 tmpM3_0;
+    VmathVector3 tmpV3_0;
+    vmathM3MakeFromQ( &tmpM3_0, unitQuat );
+    vmathV3MakeFromScalar( &tmpV3_0, 0.0f );
+    vmathT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
+}
+
+static inline void vmathT3MakeScale( VmathTransform3 *result, const VmathVector3 *scaleVec )
+{
+    vec_float4 zero = spu_splats(0.0f);
+    result->col0.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0xf000) );
+    result->col1.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0x0f00) );
+    result->col2.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0x00f0) );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3AppendScale( VmathTransform3 *result, const VmathTransform3 *tfrm, const VmathVector3 *scaleVec )
+{
+    vmathV3ScalarMul( &result->col0, &tfrm->col0, vmathV3GetX( scaleVec ) );
+    vmathV3ScalarMul( &result->col1, &tfrm->col1, vmathV3GetY( scaleVec ) );
+    vmathV3ScalarMul( &result->col2, &tfrm->col2, vmathV3GetZ( scaleVec ) );
+    vmathV3Copy( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathT3PrependScale( VmathTransform3 *result, const VmathVector3 *scaleVec, const VmathTransform3 *tfrm )
+{
+    vmathV3MulPerElem( &result->col0, &tfrm->col0, scaleVec );
+    vmathV3MulPerElem( &result->col1, &tfrm->col1, scaleVec );
+    vmathV3MulPerElem( &result->col2, &tfrm->col2, scaleVec );
+    vmathV3MulPerElem( &result->col3, &tfrm->col3, scaleVec );
+}
+
+static inline void vmathT3MakeTranslation( VmathTransform3 *result, const VmathVector3 *translateVec )
+{
+    vmathV3MakeXAxis( &result->col0 );
+    vmathV3MakeYAxis( &result->col1 );
+    vmathV3MakeZAxis( &result->col2 );
+    vmathV3Copy( &result->col3, translateVec );
+}
+
+static inline void vmathT3Select( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, unsigned int select1 )
+{
+    vmathV3Select( &result->col0, &tfrm0->col0, &tfrm1->col0, select1 );
+    vmathV3Select( &result->col1, &tfrm0->col1, &tfrm1->col1, select1 );
+    vmathV3Select( &result->col2, &tfrm0->col2, &tfrm1->col2, select1 );
+    vmathV3Select( &result->col3, &tfrm0->col3, &tfrm1->col3, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathT3Print( const VmathTransform3 *tfrm )
+{
+    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2;
+    vmathT3GetRow( &tmpV4_0, tfrm, 0 );
+    vmathV4Print( &tmpV4_0 );
+    vmathT3GetRow( &tmpV4_1, tfrm, 1 );
+    vmathV4Print( &tmpV4_1 );
+    vmathT3GetRow( &tmpV4_2, tfrm, 2 );
+    vmathV4Print( &tmpV4_2 );
+}
+
+static inline void vmathT3Prints( const VmathTransform3 *tfrm, const char *name )
+{
+    printf("%s:\n", name);
+    vmathT3Print( tfrm );
+}
+
+#endif
+
+static inline void vmathQMakeFromM3( VmathQuat *result, const VmathMatrix3 *tfrm )
+{
+    vec_float4 res;
+    vec_float4 col0, col1, col2;
+    vec_float4 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff;
+    vec_float4 zy_xz_yx, yz_zx_xy, sum, diff;
+    vec_float4 radicand, invSqrt, scale;
+    vec_float4 res0, res1, res2, res3;
+    vec_float4 xx, yy, zz;
+    vec_uint4 select_x = (vec_uint4)spu_maskb( 0xf000 );
+    vec_uint4 select_y = (vec_uint4)spu_maskb( 0x0f00 );
+    vec_uint4 select_z = (vec_uint4)spu_maskb( 0x00f0 );
+    vec_uint4 select_w = (vec_uint4)spu_maskb( 0x000f );
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((unsigned int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((unsigned int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((unsigned int)0x08090a0b);
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((unsigned int)0x0c0d0e0f);
+
+    col0 = tfrm->col0.vec128;
+    col1 = tfrm->col1.vec128;
+    col2 = tfrm->col2.vec128;
+
+    /* four cases: */
+    /* trace > 0 */
+    /* else */
+    /*    xx largest diagonal element */
+    /*    yy largest diagonal element */
+    /*    zz largest diagonal element */
+
+    /* compute quaternion for each case */
+
+    xx_yy = spu_sel( col0, col1, select_y );
+    xx_yy_zz_xx = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_XYCX );
+    yy_zz_xx_yy = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_YCXY );
+    zz_xx_yy_zz = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_CXYC );
+
+    diagSum = spu_add( spu_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
+    diagDiff = spu_sub( spu_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
+    radicand = spu_add( spu_sel( diagDiff, diagSum, select_w ), spu_splats(1.0f) );
+    invSqrt = rsqrtf4( radicand );
+
+    zy_xz_yx = spu_sel( col0, col1, select_z );
+    zy_xz_yx = spu_shuffle( zy_xz_yx, col2, _VECTORMATH_SHUF_ZAY0 );
+    yz_zx_xy = spu_sel( col0, col1, select_x );
+    yz_zx_xy = spu_shuffle( yz_zx_xy, col2, _VECTORMATH_SHUF_BZX0 );
+
+    sum = spu_add( zy_xz_yx, yz_zx_xy );
+    diff = spu_sub( zy_xz_yx, yz_zx_xy );
+
+    scale = spu_mul( invSqrt, spu_splats(0.5f) );
+    res0 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_0ZYA );
+    res1 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_Z0XB );
+    res2 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_YX0C );
+    res3 = diff;
+    res0 = spu_sel( res0, radicand, select_x );
+    res1 = spu_sel( res1, radicand, select_y );
+    res2 = spu_sel( res2, radicand, select_z );
+    res3 = spu_sel( res3, radicand, select_w );
+    res0 = spu_mul( res0, spu_shuffle( scale, scale, shuffle_xxxx ) );
+    res1 = spu_mul( res1, spu_shuffle( scale, scale, shuffle_yyyy ) );
+    res2 = spu_mul( res2, spu_shuffle( scale, scale, shuffle_zzzz ) );
+    res3 = spu_mul( res3, spu_shuffle( scale, scale, shuffle_wwww ) );
+
+    /* determine case and select answer */
+
+    xx = spu_shuffle( col0, col0, shuffle_xxxx );
+    yy = spu_shuffle( col1, col1, shuffle_yyyy );
+    zz = spu_shuffle( col2, col2, shuffle_zzzz );
+    res = spu_sel( res0, res1, spu_cmpgt( yy, xx ) );
+    res = spu_sel( res, res2, spu_and( spu_cmpgt( zz, xx ), spu_cmpgt( zz, yy ) ) );
+    res = spu_sel( res, res3, spu_cmpgt( spu_shuffle( diagSum, diagSum, shuffle_xxxx ), spu_splats(0.0f) ) );
+    result->vec128 = res;
+}
+
+static inline void vmathV3Outer( VmathMatrix3 *result, const VmathVector3 *tfrm0, const VmathVector3 *tfrm1 )
+{
+    vmathV3ScalarMul( &result->col0, tfrm0, vmathV3GetX( tfrm1 ) );
+    vmathV3ScalarMul( &result->col1, tfrm0, vmathV3GetY( tfrm1 ) );
+    vmathV3ScalarMul( &result->col2, tfrm0, vmathV3GetZ( tfrm1 ) );
+}
+
+static inline void vmathV4Outer( VmathMatrix4 *result, const VmathVector4 *tfrm0, const VmathVector4 *tfrm1 )
+{
+    vmathV4ScalarMul( &result->col0, tfrm0, vmathV4GetX( tfrm1 ) );
+    vmathV4ScalarMul( &result->col1, tfrm0, vmathV4GetY( tfrm1 ) );
+    vmathV4ScalarMul( &result->col2, tfrm0, vmathV4GetZ( tfrm1 ) );
+    vmathV4ScalarMul( &result->col3, tfrm0, vmathV4GetW( tfrm1 ) );
+}
+
+static inline void vmathV3RowMul( VmathVector3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat )
+{
+    vec_float4 tmp0, tmp1, mcol0, mcol1, mcol2, res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    tmp0 = spu_shuffle( mat->col0.vec128, mat->col2.vec128, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( mat->col0.vec128, mat->col2.vec128, _VECTORMATH_SHUF_ZCWD );
+    xxxx = spu_shuffle( vec->vec128, vec->vec128, shuffle_xxxx );
+    mcol0 = spu_shuffle( tmp0, mat->col1.vec128, _VECTORMATH_SHUF_XAYB );
+    mcol1 = spu_shuffle( tmp0, mat->col1.vec128, _VECTORMATH_SHUF_ZBW0 );
+    mcol2 = spu_shuffle( tmp1, mat->col1.vec128, _VECTORMATH_SHUF_XCY0 );
+    yyyy = spu_shuffle( vec->vec128, vec->vec128, shuffle_yyyy );
+    res = spu_mul( mcol0, xxxx );
+    zzzz = spu_shuffle( vec->vec128, vec->vec128, shuffle_zzzz );
+    res = spu_madd( mcol1, yyyy, res );
+    res = spu_madd( mcol2, zzzz, res );
+    result->vec128 = res;
+}
+
+static inline void vmathV3CrossMatrix( VmathMatrix3 *result, const VmathVector3 *vec )
+{
+    vec_float4 neg, res0, res1, res2;
+    neg = negatef4( vec->vec128 );
+    res0 = spu_shuffle( vec->vec128, neg, _VECTORMATH_SHUF_0ZB0 );
+    res1 = spu_shuffle( vec->vec128, neg, _VECTORMATH_SHUF_C0X0 );
+    res2 = spu_shuffle( vec->vec128, neg, _VECTORMATH_SHUF_YA00 );
+    result->col0.vec128 = res0;
+    result->col1.vec128 = res1;
+    result->col2.vec128 = res2;
+}
+
+static inline void vmathV3CrossMatrixMul( VmathMatrix3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat )
+{
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2;
+    vmathV3Cross( &tmpV3_0, vec, &mat->col0 );
+    vmathV3Cross( &tmpV3_1, vec, &mat->col1 );
+    vmathV3Cross( &tmpV3_2, vec, &mat->col2 );
+    vmathM3MakeFromCols( result, &tmpV3_0, &tmpV3_1, &tmpV3_2 );
+}
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/mat_aos_v.h b/Extras/vectormathlibrary/include/vectormath/spu/c/mat_aos_v.h
index 330dfda38..986612ebe 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/mat_aos_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/mat_aos_v.h
@@ -1,1029 +1,1029 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_AOS_V_C_H
-#define _VECTORMATH_MAT_AOS_V_C_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
- */
-#define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_0ZB0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_0 })     
-#define _VECTORMATH_SHUF_C0X0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_YA00 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_Z })
-#define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X })
-#define _VECTORMATH_SHUF_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y })
-#define _VECTORMATH_SHUF_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_ZAY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_BZX0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_0ZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A })
-#define _VECTORMATH_SHUF_Z0XB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_YX0C ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_CZD0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_BBY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-static inline VmathMatrix3 vmathM3MakeFromScalar_V( float scalar )
-{
-    VmathMatrix3 result;
-    vmathM3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeFromQ_V( VmathQuat unitQuat )
-{
-    VmathMatrix3 result;
-    vmathM3MakeFromQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeFromCols_V( VmathVector3 _col0, VmathVector3 _col1, VmathVector3 _col2 )
-{
-    VmathMatrix3 result;
-    vmathM3MakeFromCols(&result, &_col0, &_col1, &_col2);
-    return result;
-}
-
-static inline void vmathM3SetCol0_V( VmathMatrix3 *result, VmathVector3 _col0 )
-{
-    vmathM3SetCol0(result, &_col0);
-}
-
-static inline void vmathM3SetCol1_V( VmathMatrix3 *result, VmathVector3 _col1 )
-{
-    vmathM3SetCol1(result, &_col1);
-}
-
-static inline void vmathM3SetCol2_V( VmathMatrix3 *result, VmathVector3 _col2 )
-{
-    vmathM3SetCol2(result, &_col2);
-}
-
-static inline void vmathM3SetCol_V( VmathMatrix3 *result, int col, VmathVector3 vec )
-{
-    vmathM3SetCol(result, col, &vec);
-}
-
-static inline void vmathM3SetRow_V( VmathMatrix3 *result, int row, VmathVector3 vec )
-{
-    vmathM3SetRow(result, row, &vec);
-}
-
-static inline void vmathM3SetElem_V( VmathMatrix3 *result, int col, int row, float val )
-{
-    vmathM3SetElem(result, col, row, val);
-}
-
-static inline float vmathM3GetElem_V( VmathMatrix3 mat, int col, int row )
-{
-    return vmathM3GetElem(&mat, col, row);
-}
-
-static inline VmathVector3 vmathM3GetCol0_V( VmathMatrix3 mat )
-{
-    VmathVector3 result;
-    vmathM3GetCol0(&result, &mat);
-    return result;
-}
-
-static inline VmathVector3 vmathM3GetCol1_V( VmathMatrix3 mat )
-{
-    VmathVector3 result;
-    vmathM3GetCol1(&result, &mat);
-    return result;
-}
-
-static inline VmathVector3 vmathM3GetCol2_V( VmathMatrix3 mat )
-{
-    VmathVector3 result;
-    vmathM3GetCol2(&result, &mat);
-    return result;
-}
-
-static inline VmathVector3 vmathM3GetCol_V( VmathMatrix3 mat, int col )
-{
-    VmathVector3 result;
-    vmathM3GetCol(&result, &mat, col);
-    return result;
-}
-
-static inline VmathVector3 vmathM3GetRow_V( VmathMatrix3 mat, int row )
-{
-    VmathVector3 result;
-    vmathM3GetRow(&result, &mat, row);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Transpose_V( VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3Transpose(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Inverse_V( VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3Inverse(&result, &mat);
-    return result;
-}
-
-static inline float vmathM3Determinant_V( VmathMatrix3 mat )
-{
-    return vmathM3Determinant(&mat);
-}
-
-static inline VmathMatrix3 vmathM3Add_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
-{
-    VmathMatrix3 result;
-    vmathM3Add(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Sub_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
-{
-    VmathMatrix3 result;
-    vmathM3Sub(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Neg_V( VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3Neg(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3AbsPerElem_V( VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3AbsPerElem(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3ScalarMul_V( VmathMatrix3 mat, float scalar )
-{
-    VmathMatrix3 result;
-    vmathM3ScalarMul(&result, &mat, scalar);
-    return result;
-}
-
-static inline VmathVector3 vmathM3MulV3_V( VmathMatrix3 mat, VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathM3MulV3(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Mul_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
-{
-    VmathMatrix3 result;
-    vmathM3Mul(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MulPerElem_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
-{
-    VmathMatrix3 result;
-    vmathM3MulPerElem(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeIdentity_V( )
-{
-    VmathMatrix3 result;
-    vmathM3MakeIdentity(&result);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationX_V( float radians )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationY_V( float radians )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationZ_V( float radians )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationZYX_V( VmathVector3 radiansXYZ )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationAxis_V( float radians, VmathVector3 unitVec )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationQ_V( VmathQuat unitQuat )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeScale_V( VmathVector3 scaleVec )
-{
-    VmathMatrix3 result;
-    vmathM3MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3AppendScale_V( VmathMatrix3 mat, VmathVector3 scaleVec )
-{
-    VmathMatrix3 result;
-    vmathM3AppendScale(&result, &mat, &scaleVec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3PrependScale_V( VmathVector3 scaleVec, VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3PrependScale(&result, &scaleVec, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Select_V( VmathMatrix3 mat0, VmathMatrix3 mat1, unsigned int select1 )
-{
-    VmathMatrix3 result;
-    vmathM3Select(&result, &mat0, &mat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathM3Print_V( VmathMatrix3 mat )
-{
-    vmathM3Print(&mat);
-}
-
-static inline void vmathM3Prints_V( VmathMatrix3 mat, const char *name )
-{
-    vmathM3Prints(&mat, name);
-}
-
-#endif
-
-static inline VmathMatrix4 vmathM4MakeFromScalar_V( float scalar )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFromT3_V( VmathTransform3 mat )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromT3(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFromCols_V( VmathVector4 _col0, VmathVector4 _col1, VmathVector4 _col2, VmathVector4 _col3 )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFromM3V3_V( VmathMatrix3 mat, VmathVector3 translateVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromM3V3(&result, &mat, &translateVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromQV3(&result, &unitQuat, &translateVec);
-    return result;
-}
-
-static inline void vmathM4SetCol0_V( VmathMatrix4 *result, VmathVector4 _col0 )
-{
-    vmathM4SetCol0(result, &_col0);
-}
-
-static inline void vmathM4SetCol1_V( VmathMatrix4 *result, VmathVector4 _col1 )
-{
-    vmathM4SetCol1(result, &_col1);
-}
-
-static inline void vmathM4SetCol2_V( VmathMatrix4 *result, VmathVector4 _col2 )
-{
-    vmathM4SetCol2(result, &_col2);
-}
-
-static inline void vmathM4SetCol3_V( VmathMatrix4 *result, VmathVector4 _col3 )
-{
-    vmathM4SetCol3(result, &_col3);
-}
-
-static inline void vmathM4SetCol_V( VmathMatrix4 *result, int col, VmathVector4 vec )
-{
-    vmathM4SetCol(result, col, &vec);
-}
-
-static inline void vmathM4SetRow_V( VmathMatrix4 *result, int row, VmathVector4 vec )
-{
-    vmathM4SetRow(result, row, &vec);
-}
-
-static inline void vmathM4SetElem_V( VmathMatrix4 *result, int col, int row, float val )
-{
-    vmathM4SetElem(result, col, row, val);
-}
-
-static inline float vmathM4GetElem_V( VmathMatrix4 mat, int col, int row )
-{
-    return vmathM4GetElem(&mat, col, row);
-}
-
-static inline VmathVector4 vmathM4GetCol0_V( VmathMatrix4 mat )
-{
-    VmathVector4 result;
-    vmathM4GetCol0(&result, &mat);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetCol1_V( VmathMatrix4 mat )
-{
-    VmathVector4 result;
-    vmathM4GetCol1(&result, &mat);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetCol2_V( VmathMatrix4 mat )
-{
-    VmathVector4 result;
-    vmathM4GetCol2(&result, &mat);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetCol3_V( VmathMatrix4 mat )
-{
-    VmathVector4 result;
-    vmathM4GetCol3(&result, &mat);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetCol_V( VmathMatrix4 mat, int col )
-{
-    VmathVector4 result;
-    vmathM4GetCol(&result, &mat, col);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetRow_V( VmathMatrix4 mat, int row )
-{
-    VmathVector4 result;
-    vmathM4GetRow(&result, &mat, row);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Transpose_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4Transpose(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Inverse_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4Inverse(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4AffineInverse_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4AffineInverse(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4OrthoInverse_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4OrthoInverse(&result, &mat);
-    return result;
-}
-
-static inline float vmathM4Determinant_V( VmathMatrix4 mat )
-{
-    return vmathM4Determinant(&mat);
-}
-
-static inline VmathMatrix4 vmathM4Add_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
-{
-    VmathMatrix4 result;
-    vmathM4Add(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Sub_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
-{
-    VmathMatrix4 result;
-    vmathM4Sub(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Neg_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4Neg(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4AbsPerElem_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4AbsPerElem(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4ScalarMul_V( VmathMatrix4 mat, float scalar )
-{
-    VmathMatrix4 result;
-    vmathM4ScalarMul(&result, &mat, scalar);
-    return result;
-}
-
-static inline VmathVector4 vmathM4MulV4_V( VmathMatrix4 mat, VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathM4MulV4(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathM4MulV3_V( VmathMatrix4 mat, VmathVector3 vec )
-{
-    VmathVector4 result;
-    vmathM4MulV3(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathM4MulP3_V( VmathMatrix4 mat, VmathPoint3 pnt )
-{
-    VmathVector4 result;
-    vmathM4MulP3(&result, &mat, &pnt);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Mul_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
-{
-    VmathMatrix4 result;
-    vmathM4Mul(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MulT3_V( VmathMatrix4 mat, VmathTransform3 tfrm1 )
-{
-    VmathMatrix4 result;
-    vmathM4MulT3(&result, &mat, &tfrm1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MulPerElem_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
-{
-    VmathMatrix4 result;
-    vmathM4MulPerElem(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeIdentity_V( )
-{
-    VmathMatrix4 result;
-    vmathM4MakeIdentity(&result);
-    return result;
-}
-
-static inline void vmathM4SetUpper3x3_V( VmathMatrix4 *result, VmathMatrix3 mat3 )
-{
-    vmathM4SetUpper3x3(result, &mat3);
-}
-
-static inline VmathMatrix3 vmathM4GetUpper3x3_V( VmathMatrix4 mat )
-{
-    VmathMatrix3 result;
-    vmathM4GetUpper3x3(&result, &mat);
-    return result;
-}
-
-static inline void vmathM4SetTranslation_V( VmathMatrix4 *result, VmathVector3 translateVec )
-{
-    vmathM4SetTranslation(result, &translateVec);
-}
-
-static inline VmathVector3 vmathM4GetTranslation_V( VmathMatrix4 mat )
-{
-    VmathVector3 result;
-    vmathM4GetTranslation(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationX_V( float radians )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationY_V( float radians )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationZ_V( float radians )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationZYX_V( VmathVector3 radiansXYZ )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationAxis_V( float radians, VmathVector3 unitVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationQ_V( VmathQuat unitQuat )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeScale_V( VmathVector3 scaleVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4AppendScale_V( VmathMatrix4 mat, VmathVector3 scaleVec )
-{
-    VmathMatrix4 result;
-    vmathM4AppendScale(&result, &mat, &scaleVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4PrependScale_V( VmathVector3 scaleVec, VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4PrependScale(&result, &scaleVec, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeTranslation_V( VmathVector3 translateVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeTranslation(&result, &translateVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeLookAt_V( VmathPoint3 eyePos, VmathPoint3 lookAtPos, VmathVector3 upVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeLookAt(&result, &eyePos, &lookAtPos, &upVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakePerspective_V( float fovyRadians, float aspect, float zNear, float zFar )
-{
-    VmathMatrix4 result;
-    vmathM4MakePerspective(&result, fovyRadians, aspect, zNear, zFar);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFrustum_V( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFrustum(&result, left, right, bottom, top, zNear, zFar);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeOrthographic_V( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    VmathMatrix4 result;
-    vmathM4MakeOrthographic(&result, left, right, bottom, top, zNear, zFar);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Select_V( VmathMatrix4 mat0, VmathMatrix4 mat1, unsigned int select1 )
-{
-    VmathMatrix4 result;
-    vmathM4Select(&result, &mat0, &mat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathM4Print_V( VmathMatrix4 mat )
-{
-    vmathM4Print(&mat);
-}
-
-static inline void vmathM4Prints_V( VmathMatrix4 mat, const char *name )
-{
-    vmathM4Prints(&mat, name);
-}
-
-#endif
-
-static inline VmathTransform3 vmathT3MakeFromScalar_V( float scalar )
-{
-    VmathTransform3 result;
-    vmathT3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeFromCols_V( VmathVector3 _col0, VmathVector3 _col1, VmathVector3 _col2, VmathVector3 _col3 )
-{
-    VmathTransform3 result;
-    vmathT3MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeFromM3V3_V( VmathMatrix3 tfrm, VmathVector3 translateVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeFromM3V3(&result, &tfrm, &translateVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeFromQV3(&result, &unitQuat, &translateVec);
-    return result;
-}
-
-static inline void vmathT3SetCol0_V( VmathTransform3 *result, VmathVector3 _col0 )
-{
-    vmathT3SetCol0(result, &_col0);
-}
-
-static inline void vmathT3SetCol1_V( VmathTransform3 *result, VmathVector3 _col1 )
-{
-    vmathT3SetCol1(result, &_col1);
-}
-
-static inline void vmathT3SetCol2_V( VmathTransform3 *result, VmathVector3 _col2 )
-{
-    vmathT3SetCol2(result, &_col2);
-}
-
-static inline void vmathT3SetCol3_V( VmathTransform3 *result, VmathVector3 _col3 )
-{
-    vmathT3SetCol3(result, &_col3);
-}
-
-static inline void vmathT3SetCol_V( VmathTransform3 *result, int col, VmathVector3 vec )
-{
-    vmathT3SetCol(result, col, &vec);
-}
-
-static inline void vmathT3SetRow_V( VmathTransform3 *result, int row, VmathVector4 vec )
-{
-    vmathT3SetRow(result, row, &vec);
-}
-
-static inline void vmathT3SetElem_V( VmathTransform3 *result, int col, int row, float val )
-{
-    vmathT3SetElem(result, col, row, val);
-}
-
-static inline float vmathT3GetElem_V( VmathTransform3 tfrm, int col, int row )
-{
-    return vmathT3GetElem(&tfrm, col, row);
-}
-
-static inline VmathVector3 vmathT3GetCol0_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetCol0(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3GetCol1_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetCol1(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3GetCol2_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetCol2(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3GetCol3_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetCol3(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3GetCol_V( VmathTransform3 tfrm, int col )
-{
-    VmathVector3 result;
-    vmathT3GetCol(&result, &tfrm, col);
-    return result;
-}
-
-static inline VmathVector4 vmathT3GetRow_V( VmathTransform3 tfrm, int row )
-{
-    VmathVector4 result;
-    vmathT3GetRow(&result, &tfrm, row);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3Inverse_V( VmathTransform3 tfrm )
-{
-    VmathTransform3 result;
-    vmathT3Inverse(&result, &tfrm);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3OrthoInverse_V( VmathTransform3 tfrm )
-{
-    VmathTransform3 result;
-    vmathT3OrthoInverse(&result, &tfrm);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3AbsPerElem_V( VmathTransform3 tfrm )
-{
-    VmathTransform3 result;
-    vmathT3AbsPerElem(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3MulV3_V( VmathTransform3 tfrm, VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathT3MulV3(&result, &tfrm, &vec);
-    return result;
-}
-
-static inline VmathPoint3 vmathT3MulP3_V( VmathTransform3 tfrm, VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathT3MulP3(&result, &tfrm, &pnt);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3Mul_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 )
-{
-    VmathTransform3 result;
-    vmathT3Mul(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MulPerElem_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 )
-{
-    VmathTransform3 result;
-    vmathT3MulPerElem(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeIdentity_V( )
-{
-    VmathTransform3 result;
-    vmathT3MakeIdentity(&result);
-    return result;
-}
-
-static inline void vmathT3SetUpper3x3_V( VmathTransform3 *result, VmathMatrix3 tfrm )
-{
-    vmathT3SetUpper3x3(result, &tfrm);
-}
-
-static inline VmathMatrix3 vmathT3GetUpper3x3_V( VmathTransform3 tfrm )
-{
-    VmathMatrix3 result;
-    vmathT3GetUpper3x3(&result, &tfrm);
-    return result;
-}
-
-static inline void vmathT3SetTranslation_V( VmathTransform3 *result, VmathVector3 translateVec )
-{
-    vmathT3SetTranslation(result, &translateVec);
-}
-
-static inline VmathVector3 vmathT3GetTranslation_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetTranslation(&result, &tfrm);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationX_V( float radians )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationY_V( float radians )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationZ_V( float radians )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationZYX_V( VmathVector3 radiansXYZ )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationAxis_V( float radians, VmathVector3 unitVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationQ_V( VmathQuat unitQuat )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeScale_V( VmathVector3 scaleVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3AppendScale_V( VmathTransform3 tfrm, VmathVector3 scaleVec )
-{
-    VmathTransform3 result;
-    vmathT3AppendScale(&result, &tfrm, &scaleVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3PrependScale_V( VmathVector3 scaleVec, VmathTransform3 tfrm )
-{
-    VmathTransform3 result;
-    vmathT3PrependScale(&result, &scaleVec, &tfrm);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeTranslation_V( VmathVector3 translateVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeTranslation(&result, &translateVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3Select_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, unsigned int select1 )
-{
-    VmathTransform3 result;
-    vmathT3Select(&result, &tfrm0, &tfrm1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathT3Print_V( VmathTransform3 tfrm )
-{
-    vmathT3Print(&tfrm);
-}
-
-static inline void vmathT3Prints_V( VmathTransform3 tfrm, const char *name )
-{
-    vmathT3Prints(&tfrm, name);
-}
-
-#endif
-
-static inline VmathQuat vmathQMakeFromM3_V( VmathMatrix3 tfrm )
-{
-    VmathQuat result;
-    vmathQMakeFromM3(&result, &tfrm);
-    return result;
-}
-
-static inline VmathMatrix3 vmathV3Outer_V( VmathVector3 tfrm0, VmathVector3 tfrm1 )
-{
-    VmathMatrix3 result;
-    vmathV3Outer(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathV4Outer_V( VmathVector4 tfrm0, VmathVector4 tfrm1 )
-{
-    VmathMatrix4 result;
-    vmathV4Outer(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3RowMul_V( VmathVector3 vec, VmathMatrix3 mat )
-{
-    VmathVector3 result;
-    vmathV3RowMul(&result, &vec, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathV3CrossMatrix_V( VmathVector3 vec )
-{
-    VmathMatrix3 result;
-    vmathV3CrossMatrix(&result, &vec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathV3CrossMatrixMul_V( VmathVector3 vec, VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathV3CrossMatrixMul(&result, &vec, &mat);
-    return result;
-}
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_AOS_V_C_H
+#define _VECTORMATH_MAT_AOS_V_C_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+ */
+#define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_0ZB0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_0 })     
+#define _VECTORMATH_SHUF_C0X0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_YA00 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_Z })
+#define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X })
+#define _VECTORMATH_SHUF_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y })
+#define _VECTORMATH_SHUF_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_ZAY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_BZX0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_0ZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A })
+#define _VECTORMATH_SHUF_Z0XB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_YX0C ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_CZD0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_BBY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+static inline VmathMatrix3 vmathM3MakeFromScalar_V( float scalar )
+{
+    VmathMatrix3 result;
+    vmathM3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeFromQ_V( VmathQuat unitQuat )
+{
+    VmathMatrix3 result;
+    vmathM3MakeFromQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeFromCols_V( VmathVector3 _col0, VmathVector3 _col1, VmathVector3 _col2 )
+{
+    VmathMatrix3 result;
+    vmathM3MakeFromCols(&result, &_col0, &_col1, &_col2);
+    return result;
+}
+
+static inline void vmathM3SetCol0_V( VmathMatrix3 *result, VmathVector3 _col0 )
+{
+    vmathM3SetCol0(result, &_col0);
+}
+
+static inline void vmathM3SetCol1_V( VmathMatrix3 *result, VmathVector3 _col1 )
+{
+    vmathM3SetCol1(result, &_col1);
+}
+
+static inline void vmathM3SetCol2_V( VmathMatrix3 *result, VmathVector3 _col2 )
+{
+    vmathM3SetCol2(result, &_col2);
+}
+
+static inline void vmathM3SetCol_V( VmathMatrix3 *result, int col, VmathVector3 vec )
+{
+    vmathM3SetCol(result, col, &vec);
+}
+
+static inline void vmathM3SetRow_V( VmathMatrix3 *result, int row, VmathVector3 vec )
+{
+    vmathM3SetRow(result, row, &vec);
+}
+
+static inline void vmathM3SetElem_V( VmathMatrix3 *result, int col, int row, float val )
+{
+    vmathM3SetElem(result, col, row, val);
+}
+
+static inline float vmathM3GetElem_V( VmathMatrix3 mat, int col, int row )
+{
+    return vmathM3GetElem(&mat, col, row);
+}
+
+static inline VmathVector3 vmathM3GetCol0_V( VmathMatrix3 mat )
+{
+    VmathVector3 result;
+    vmathM3GetCol0(&result, &mat);
+    return result;
+}
+
+static inline VmathVector3 vmathM3GetCol1_V( VmathMatrix3 mat )
+{
+    VmathVector3 result;
+    vmathM3GetCol1(&result, &mat);
+    return result;
+}
+
+static inline VmathVector3 vmathM3GetCol2_V( VmathMatrix3 mat )
+{
+    VmathVector3 result;
+    vmathM3GetCol2(&result, &mat);
+    return result;
+}
+
+static inline VmathVector3 vmathM3GetCol_V( VmathMatrix3 mat, int col )
+{
+    VmathVector3 result;
+    vmathM3GetCol(&result, &mat, col);
+    return result;
+}
+
+static inline VmathVector3 vmathM3GetRow_V( VmathMatrix3 mat, int row )
+{
+    VmathVector3 result;
+    vmathM3GetRow(&result, &mat, row);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Transpose_V( VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3Transpose(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Inverse_V( VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3Inverse(&result, &mat);
+    return result;
+}
+
+static inline float vmathM3Determinant_V( VmathMatrix3 mat )
+{
+    return vmathM3Determinant(&mat);
+}
+
+static inline VmathMatrix3 vmathM3Add_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
+{
+    VmathMatrix3 result;
+    vmathM3Add(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Sub_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
+{
+    VmathMatrix3 result;
+    vmathM3Sub(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Neg_V( VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3Neg(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3AbsPerElem_V( VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3AbsPerElem(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3ScalarMul_V( VmathMatrix3 mat, float scalar )
+{
+    VmathMatrix3 result;
+    vmathM3ScalarMul(&result, &mat, scalar);
+    return result;
+}
+
+static inline VmathVector3 vmathM3MulV3_V( VmathMatrix3 mat, VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathM3MulV3(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Mul_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
+{
+    VmathMatrix3 result;
+    vmathM3Mul(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MulPerElem_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
+{
+    VmathMatrix3 result;
+    vmathM3MulPerElem(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeIdentity_V( )
+{
+    VmathMatrix3 result;
+    vmathM3MakeIdentity(&result);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationX_V( float radians )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationY_V( float radians )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationZ_V( float radians )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationZYX_V( VmathVector3 radiansXYZ )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationAxis_V( float radians, VmathVector3 unitVec )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationQ_V( VmathQuat unitQuat )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeScale_V( VmathVector3 scaleVec )
+{
+    VmathMatrix3 result;
+    vmathM3MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3AppendScale_V( VmathMatrix3 mat, VmathVector3 scaleVec )
+{
+    VmathMatrix3 result;
+    vmathM3AppendScale(&result, &mat, &scaleVec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3PrependScale_V( VmathVector3 scaleVec, VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3PrependScale(&result, &scaleVec, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Select_V( VmathMatrix3 mat0, VmathMatrix3 mat1, unsigned int select1 )
+{
+    VmathMatrix3 result;
+    vmathM3Select(&result, &mat0, &mat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathM3Print_V( VmathMatrix3 mat )
+{
+    vmathM3Print(&mat);
+}
+
+static inline void vmathM3Prints_V( VmathMatrix3 mat, const char *name )
+{
+    vmathM3Prints(&mat, name);
+}
+
+#endif
+
+static inline VmathMatrix4 vmathM4MakeFromScalar_V( float scalar )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFromT3_V( VmathTransform3 mat )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromT3(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFromCols_V( VmathVector4 _col0, VmathVector4 _col1, VmathVector4 _col2, VmathVector4 _col3 )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFromM3V3_V( VmathMatrix3 mat, VmathVector3 translateVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromM3V3(&result, &mat, &translateVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromQV3(&result, &unitQuat, &translateVec);
+    return result;
+}
+
+static inline void vmathM4SetCol0_V( VmathMatrix4 *result, VmathVector4 _col0 )
+{
+    vmathM4SetCol0(result, &_col0);
+}
+
+static inline void vmathM4SetCol1_V( VmathMatrix4 *result, VmathVector4 _col1 )
+{
+    vmathM4SetCol1(result, &_col1);
+}
+
+static inline void vmathM4SetCol2_V( VmathMatrix4 *result, VmathVector4 _col2 )
+{
+    vmathM4SetCol2(result, &_col2);
+}
+
+static inline void vmathM4SetCol3_V( VmathMatrix4 *result, VmathVector4 _col3 )
+{
+    vmathM4SetCol3(result, &_col3);
+}
+
+static inline void vmathM4SetCol_V( VmathMatrix4 *result, int col, VmathVector4 vec )
+{
+    vmathM4SetCol(result, col, &vec);
+}
+
+static inline void vmathM4SetRow_V( VmathMatrix4 *result, int row, VmathVector4 vec )
+{
+    vmathM4SetRow(result, row, &vec);
+}
+
+static inline void vmathM4SetElem_V( VmathMatrix4 *result, int col, int row, float val )
+{
+    vmathM4SetElem(result, col, row, val);
+}
+
+static inline float vmathM4GetElem_V( VmathMatrix4 mat, int col, int row )
+{
+    return vmathM4GetElem(&mat, col, row);
+}
+
+static inline VmathVector4 vmathM4GetCol0_V( VmathMatrix4 mat )
+{
+    VmathVector4 result;
+    vmathM4GetCol0(&result, &mat);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetCol1_V( VmathMatrix4 mat )
+{
+    VmathVector4 result;
+    vmathM4GetCol1(&result, &mat);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetCol2_V( VmathMatrix4 mat )
+{
+    VmathVector4 result;
+    vmathM4GetCol2(&result, &mat);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetCol3_V( VmathMatrix4 mat )
+{
+    VmathVector4 result;
+    vmathM4GetCol3(&result, &mat);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetCol_V( VmathMatrix4 mat, int col )
+{
+    VmathVector4 result;
+    vmathM4GetCol(&result, &mat, col);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetRow_V( VmathMatrix4 mat, int row )
+{
+    VmathVector4 result;
+    vmathM4GetRow(&result, &mat, row);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Transpose_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4Transpose(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Inverse_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4Inverse(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4AffineInverse_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4AffineInverse(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4OrthoInverse_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4OrthoInverse(&result, &mat);
+    return result;
+}
+
+static inline float vmathM4Determinant_V( VmathMatrix4 mat )
+{
+    return vmathM4Determinant(&mat);
+}
+
+static inline VmathMatrix4 vmathM4Add_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
+{
+    VmathMatrix4 result;
+    vmathM4Add(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Sub_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
+{
+    VmathMatrix4 result;
+    vmathM4Sub(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Neg_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4Neg(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4AbsPerElem_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4AbsPerElem(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4ScalarMul_V( VmathMatrix4 mat, float scalar )
+{
+    VmathMatrix4 result;
+    vmathM4ScalarMul(&result, &mat, scalar);
+    return result;
+}
+
+static inline VmathVector4 vmathM4MulV4_V( VmathMatrix4 mat, VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathM4MulV4(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathM4MulV3_V( VmathMatrix4 mat, VmathVector3 vec )
+{
+    VmathVector4 result;
+    vmathM4MulV3(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathM4MulP3_V( VmathMatrix4 mat, VmathPoint3 pnt )
+{
+    VmathVector4 result;
+    vmathM4MulP3(&result, &mat, &pnt);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Mul_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
+{
+    VmathMatrix4 result;
+    vmathM4Mul(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MulT3_V( VmathMatrix4 mat, VmathTransform3 tfrm1 )
+{
+    VmathMatrix4 result;
+    vmathM4MulT3(&result, &mat, &tfrm1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MulPerElem_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
+{
+    VmathMatrix4 result;
+    vmathM4MulPerElem(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeIdentity_V( )
+{
+    VmathMatrix4 result;
+    vmathM4MakeIdentity(&result);
+    return result;
+}
+
+static inline void vmathM4SetUpper3x3_V( VmathMatrix4 *result, VmathMatrix3 mat3 )
+{
+    vmathM4SetUpper3x3(result, &mat3);
+}
+
+static inline VmathMatrix3 vmathM4GetUpper3x3_V( VmathMatrix4 mat )
+{
+    VmathMatrix3 result;
+    vmathM4GetUpper3x3(&result, &mat);
+    return result;
+}
+
+static inline void vmathM4SetTranslation_V( VmathMatrix4 *result, VmathVector3 translateVec )
+{
+    vmathM4SetTranslation(result, &translateVec);
+}
+
+static inline VmathVector3 vmathM4GetTranslation_V( VmathMatrix4 mat )
+{
+    VmathVector3 result;
+    vmathM4GetTranslation(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationX_V( float radians )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationY_V( float radians )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationZ_V( float radians )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationZYX_V( VmathVector3 radiansXYZ )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationAxis_V( float radians, VmathVector3 unitVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationQ_V( VmathQuat unitQuat )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeScale_V( VmathVector3 scaleVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4AppendScale_V( VmathMatrix4 mat, VmathVector3 scaleVec )
+{
+    VmathMatrix4 result;
+    vmathM4AppendScale(&result, &mat, &scaleVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4PrependScale_V( VmathVector3 scaleVec, VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4PrependScale(&result, &scaleVec, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeTranslation_V( VmathVector3 translateVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeTranslation(&result, &translateVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeLookAt_V( VmathPoint3 eyePos, VmathPoint3 lookAtPos, VmathVector3 upVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeLookAt(&result, &eyePos, &lookAtPos, &upVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakePerspective_V( float fovyRadians, float aspect, float zNear, float zFar )
+{
+    VmathMatrix4 result;
+    vmathM4MakePerspective(&result, fovyRadians, aspect, zNear, zFar);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFrustum_V( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFrustum(&result, left, right, bottom, top, zNear, zFar);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeOrthographic_V( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    VmathMatrix4 result;
+    vmathM4MakeOrthographic(&result, left, right, bottom, top, zNear, zFar);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Select_V( VmathMatrix4 mat0, VmathMatrix4 mat1, unsigned int select1 )
+{
+    VmathMatrix4 result;
+    vmathM4Select(&result, &mat0, &mat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathM4Print_V( VmathMatrix4 mat )
+{
+    vmathM4Print(&mat);
+}
+
+static inline void vmathM4Prints_V( VmathMatrix4 mat, const char *name )
+{
+    vmathM4Prints(&mat, name);
+}
+
+#endif
+
+static inline VmathTransform3 vmathT3MakeFromScalar_V( float scalar )
+{
+    VmathTransform3 result;
+    vmathT3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeFromCols_V( VmathVector3 _col0, VmathVector3 _col1, VmathVector3 _col2, VmathVector3 _col3 )
+{
+    VmathTransform3 result;
+    vmathT3MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeFromM3V3_V( VmathMatrix3 tfrm, VmathVector3 translateVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeFromM3V3(&result, &tfrm, &translateVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeFromQV3(&result, &unitQuat, &translateVec);
+    return result;
+}
+
+static inline void vmathT3SetCol0_V( VmathTransform3 *result, VmathVector3 _col0 )
+{
+    vmathT3SetCol0(result, &_col0);
+}
+
+static inline void vmathT3SetCol1_V( VmathTransform3 *result, VmathVector3 _col1 )
+{
+    vmathT3SetCol1(result, &_col1);
+}
+
+static inline void vmathT3SetCol2_V( VmathTransform3 *result, VmathVector3 _col2 )
+{
+    vmathT3SetCol2(result, &_col2);
+}
+
+static inline void vmathT3SetCol3_V( VmathTransform3 *result, VmathVector3 _col3 )
+{
+    vmathT3SetCol3(result, &_col3);
+}
+
+static inline void vmathT3SetCol_V( VmathTransform3 *result, int col, VmathVector3 vec )
+{
+    vmathT3SetCol(result, col, &vec);
+}
+
+static inline void vmathT3SetRow_V( VmathTransform3 *result, int row, VmathVector4 vec )
+{
+    vmathT3SetRow(result, row, &vec);
+}
+
+static inline void vmathT3SetElem_V( VmathTransform3 *result, int col, int row, float val )
+{
+    vmathT3SetElem(result, col, row, val);
+}
+
+static inline float vmathT3GetElem_V( VmathTransform3 tfrm, int col, int row )
+{
+    return vmathT3GetElem(&tfrm, col, row);
+}
+
+static inline VmathVector3 vmathT3GetCol0_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetCol0(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3GetCol1_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetCol1(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3GetCol2_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetCol2(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3GetCol3_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetCol3(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3GetCol_V( VmathTransform3 tfrm, int col )
+{
+    VmathVector3 result;
+    vmathT3GetCol(&result, &tfrm, col);
+    return result;
+}
+
+static inline VmathVector4 vmathT3GetRow_V( VmathTransform3 tfrm, int row )
+{
+    VmathVector4 result;
+    vmathT3GetRow(&result, &tfrm, row);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3Inverse_V( VmathTransform3 tfrm )
+{
+    VmathTransform3 result;
+    vmathT3Inverse(&result, &tfrm);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3OrthoInverse_V( VmathTransform3 tfrm )
+{
+    VmathTransform3 result;
+    vmathT3OrthoInverse(&result, &tfrm);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3AbsPerElem_V( VmathTransform3 tfrm )
+{
+    VmathTransform3 result;
+    vmathT3AbsPerElem(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3MulV3_V( VmathTransform3 tfrm, VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathT3MulV3(&result, &tfrm, &vec);
+    return result;
+}
+
+static inline VmathPoint3 vmathT3MulP3_V( VmathTransform3 tfrm, VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathT3MulP3(&result, &tfrm, &pnt);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3Mul_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 )
+{
+    VmathTransform3 result;
+    vmathT3Mul(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MulPerElem_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 )
+{
+    VmathTransform3 result;
+    vmathT3MulPerElem(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeIdentity_V( )
+{
+    VmathTransform3 result;
+    vmathT3MakeIdentity(&result);
+    return result;
+}
+
+static inline void vmathT3SetUpper3x3_V( VmathTransform3 *result, VmathMatrix3 tfrm )
+{
+    vmathT3SetUpper3x3(result, &tfrm);
+}
+
+static inline VmathMatrix3 vmathT3GetUpper3x3_V( VmathTransform3 tfrm )
+{
+    VmathMatrix3 result;
+    vmathT3GetUpper3x3(&result, &tfrm);
+    return result;
+}
+
+static inline void vmathT3SetTranslation_V( VmathTransform3 *result, VmathVector3 translateVec )
+{
+    vmathT3SetTranslation(result, &translateVec);
+}
+
+static inline VmathVector3 vmathT3GetTranslation_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetTranslation(&result, &tfrm);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationX_V( float radians )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationY_V( float radians )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationZ_V( float radians )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationZYX_V( VmathVector3 radiansXYZ )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationAxis_V( float radians, VmathVector3 unitVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationQ_V( VmathQuat unitQuat )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeScale_V( VmathVector3 scaleVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3AppendScale_V( VmathTransform3 tfrm, VmathVector3 scaleVec )
+{
+    VmathTransform3 result;
+    vmathT3AppendScale(&result, &tfrm, &scaleVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3PrependScale_V( VmathVector3 scaleVec, VmathTransform3 tfrm )
+{
+    VmathTransform3 result;
+    vmathT3PrependScale(&result, &scaleVec, &tfrm);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeTranslation_V( VmathVector3 translateVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeTranslation(&result, &translateVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3Select_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, unsigned int select1 )
+{
+    VmathTransform3 result;
+    vmathT3Select(&result, &tfrm0, &tfrm1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathT3Print_V( VmathTransform3 tfrm )
+{
+    vmathT3Print(&tfrm);
+}
+
+static inline void vmathT3Prints_V( VmathTransform3 tfrm, const char *name )
+{
+    vmathT3Prints(&tfrm, name);
+}
+
+#endif
+
+static inline VmathQuat vmathQMakeFromM3_V( VmathMatrix3 tfrm )
+{
+    VmathQuat result;
+    vmathQMakeFromM3(&result, &tfrm);
+    return result;
+}
+
+static inline VmathMatrix3 vmathV3Outer_V( VmathVector3 tfrm0, VmathVector3 tfrm1 )
+{
+    VmathMatrix3 result;
+    vmathV3Outer(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathV4Outer_V( VmathVector4 tfrm0, VmathVector4 tfrm1 )
+{
+    VmathMatrix4 result;
+    vmathV4Outer(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3RowMul_V( VmathVector3 vec, VmathMatrix3 mat )
+{
+    VmathVector3 result;
+    vmathV3RowMul(&result, &vec, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathV3CrossMatrix_V( VmathVector3 vec )
+{
+    VmathMatrix3 result;
+    vmathV3CrossMatrix(&result, &vec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathV3CrossMatrixMul_V( VmathVector3 vec, VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathV3CrossMatrixMul(&result, &vec, &mat);
+    return result;
+}
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/mat_soa.h b/Extras/vectormathlibrary/include/vectormath/spu/c/mat_soa.h
index 32234b7e9..c2c3734e0 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/mat_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/mat_soa.h
@@ -1,1493 +1,1493 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_SOA_C_H
-#define _VECTORMATH_MAT_SOA_C_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- */
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-static inline void vmathSoaM3Copy( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3Copy( &result->col0, &mat->col0 );
-    vmathSoaV3Copy( &result->col1, &mat->col1 );
-    vmathSoaV3Copy( &result->col2, &mat->col2 );
-}
-
-static inline void vmathSoaM3MakeFromScalar( VmathSoaMatrix3 *result, vec_float4 scalar )
-{
-    vmathSoaV3MakeFromScalar( &result->col0, scalar );
-    vmathSoaV3MakeFromScalar( &result->col1, scalar );
-    vmathSoaV3MakeFromScalar( &result->col2, scalar );
-}
-
-static inline void vmathSoaM3MakeFromQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat )
-{
-    vec_float4 qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
-    qx = unitQuat->x;
-    qy = unitQuat->y;
-    qz = unitQuat->z;
-    qw = unitQuat->w;
-    qx2 = spu_add( qx, qx );
-    qy2 = spu_add( qy, qy );
-    qz2 = spu_add( qz, qz );
-    qxqx2 = spu_mul( qx, qx2 );
-    qxqy2 = spu_mul( qx, qy2 );
-    qxqz2 = spu_mul( qx, qz2 );
-    qxqw2 = spu_mul( qw, qx2 );
-    qyqy2 = spu_mul( qy, qy2 );
-    qyqz2 = spu_mul( qy, qz2 );
-    qyqw2 = spu_mul( qw, qy2 );
-    qzqz2 = spu_mul( qz, qz2 );
-    qzqw2 = spu_mul( qw, qz2 );
-    vmathSoaV3MakeFromElems( &result->col0, spu_sub( spu_sub( spu_splats(1.0f), qyqy2 ), qzqz2 ), spu_add( qxqy2, qzqw2 ), spu_sub( qxqz2, qyqw2 ) );
-    vmathSoaV3MakeFromElems( &result->col1, spu_sub( qxqy2, qzqw2 ), spu_sub( spu_sub( spu_splats(1.0f), qxqx2 ), qzqz2 ), spu_add( qyqz2, qxqw2 ) );
-    vmathSoaV3MakeFromElems( &result->col2, spu_add( qxqz2, qyqw2 ), spu_sub( qyqz2, qxqw2 ), spu_sub( spu_sub( spu_splats(1.0f), qxqx2 ), qyqy2 ) );
-}
-
-static inline void vmathSoaM3MakeFromCols( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col0, const VmathSoaVector3 *_col1, const VmathSoaVector3 *_col2 )
-{
-    vmathSoaV3Copy( &result->col0, _col0 );
-    vmathSoaV3Copy( &result->col1, _col1 );
-    vmathSoaV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathSoaM3MakeFromAos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vmathSoaV3MakeFromAos( &result->col0, &mat->col0 );
-    vmathSoaV3MakeFromAos( &result->col1, &mat->col1 );
-    vmathSoaV3MakeFromAos( &result->col2, &mat->col2 );
-}
-
-static inline void vmathSoaM3MakeFrom4Aos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, const VmathMatrix3 *mat2, const VmathMatrix3 *mat3 )
-{
-    vmathSoaV3MakeFrom4Aos( &result->col0, &mat0->col0, &mat1->col0, &mat2->col0, &mat3->col0 );
-    vmathSoaV3MakeFrom4Aos( &result->col1, &mat0->col1, &mat1->col1, &mat2->col1, &mat3->col1 );
-    vmathSoaV3MakeFrom4Aos( &result->col2, &mat0->col2, &mat1->col2, &mat2->col2, &mat3->col2 );
-}
-
-static inline void vmathSoaM3Get4Aos( const VmathSoaMatrix3 *mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 )
-{
-    vmathSoaV3Get4Aos( &mat->col0, &result0->col0, &result1->col0, &result2->col0, &result3->col0 );
-    vmathSoaV3Get4Aos( &mat->col1, &result0->col1, &result1->col1, &result2->col1, &result3->col1 );
-    vmathSoaV3Get4Aos( &mat->col2, &result0->col2, &result1->col2, &result2->col2, &result3->col2 );
-}
-
-static inline void vmathSoaM3SetCol0( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col0 )
-{
-    vmathSoaV3Copy( &result->col0, _col0 );
-}
-
-static inline void vmathSoaM3SetCol1( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col1 )
-{
-    vmathSoaV3Copy( &result->col1, _col1 );
-}
-
-static inline void vmathSoaM3SetCol2( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col2 )
-{
-    vmathSoaV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathSoaM3SetCol( VmathSoaMatrix3 *result, int col, const VmathSoaVector3 *vec )
-{
-    vmathSoaV3Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathSoaM3SetRow( VmathSoaMatrix3 *result, int row, const VmathSoaVector3 *vec )
-{
-    vmathSoaV3SetElem( &result->col0, row, vmathSoaV3GetElem( vec, 0 ) );
-    vmathSoaV3SetElem( &result->col1, row, vmathSoaV3GetElem( vec, 1 ) );
-    vmathSoaV3SetElem( &result->col2, row, vmathSoaV3GetElem( vec, 2 ) );
-}
-
-static inline void vmathSoaM3SetElem( VmathSoaMatrix3 *result, int col, int row, vec_float4 val )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaM3GetCol( &tmpV3_0, result, col );
-    vmathSoaV3SetElem( &tmpV3_0, row, val );
-    vmathSoaM3SetCol( result, col, &tmpV3_0 );
-}
-
-static inline vec_float4 vmathSoaM3GetElem( const VmathSoaMatrix3 *mat, int col, int row )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaM3GetCol( &tmpV3_0, mat, col );
-    return vmathSoaV3GetElem( &tmpV3_0, row );
-}
-
-static inline void vmathSoaM3GetCol0( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3Copy( result, &mat->col0 );
-}
-
-static inline void vmathSoaM3GetCol1( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3Copy( result, &mat->col1 );
-}
-
-static inline void vmathSoaM3GetCol2( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3Copy( result, &mat->col2 );
-}
-
-static inline void vmathSoaM3GetCol( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int col )
-{
-    vmathSoaV3Copy( result, (&mat->col0 + col) );
-}
-
-static inline void vmathSoaM3GetRow( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int row )
-{
-    vmathSoaV3MakeFromElems( result, vmathSoaV3GetElem( &mat->col0, row ), vmathSoaV3GetElem( &mat->col1, row ), vmathSoaV3GetElem( &mat->col2, row ) );
-}
-
-static inline void vmathSoaM3Transpose( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
-{
-    VmathSoaMatrix3 tmpResult;
-    vmathSoaV3MakeFromElems( &tmpResult.col0, mat->col0.x, mat->col1.x, mat->col2.x );
-    vmathSoaV3MakeFromElems( &tmpResult.col1, mat->col0.y, mat->col1.y, mat->col2.y );
-    vmathSoaV3MakeFromElems( &tmpResult.col2, mat->col0.z, mat->col1.z, mat->col2.z );
-    vmathSoaM3Copy( result, &tmpResult );
-}
-
-static inline void vmathSoaM3Inverse( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
-{
-    VmathSoaVector3 tmp0, tmp1, tmp2;
-    vec_float4 detinv;
-    vmathSoaV3Cross( &tmp0, &mat->col1, &mat->col2 );
-    vmathSoaV3Cross( &tmp1, &mat->col2, &mat->col0 );
-    vmathSoaV3Cross( &tmp2, &mat->col0, &mat->col1 );
-    detinv = recipf4( vmathSoaV3Dot( &mat->col2, &tmp2 ) );
-    vmathSoaV3MakeFromElems( &result->col0, spu_mul( tmp0.x, detinv ), spu_mul( tmp1.x, detinv ), spu_mul( tmp2.x, detinv ) );
-    vmathSoaV3MakeFromElems( &result->col1, spu_mul( tmp0.y, detinv ), spu_mul( tmp1.y, detinv ), spu_mul( tmp2.y, detinv ) );
-    vmathSoaV3MakeFromElems( &result->col2, spu_mul( tmp0.z, detinv ), spu_mul( tmp1.z, detinv ), spu_mul( tmp2.z, detinv ) );
-}
-
-static inline vec_float4 vmathSoaM3Determinant( const VmathSoaMatrix3 *mat )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaV3Cross( &tmpV3_0, &mat->col0, &mat->col1 );
-    return vmathSoaV3Dot( &mat->col2, &tmpV3_0 );
-}
-
-static inline void vmathSoaM3Add( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
-{
-    vmathSoaV3Add( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathSoaV3Add( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathSoaV3Add( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathSoaM3Sub( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
-{
-    vmathSoaV3Sub( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathSoaV3Sub( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathSoaV3Sub( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathSoaM3Neg( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3Neg( &result->col0, &mat->col0 );
-    vmathSoaV3Neg( &result->col1, &mat->col1 );
-    vmathSoaV3Neg( &result->col2, &mat->col2 );
-}
-
-static inline void vmathSoaM3AbsPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3AbsPerElem( &result->col0, &mat->col0 );
-    vmathSoaV3AbsPerElem( &result->col1, &mat->col1 );
-    vmathSoaV3AbsPerElem( &result->col2, &mat->col2 );
-}
-
-static inline void vmathSoaM3ScalarMul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, vec_float4 scalar )
-{
-    vmathSoaV3ScalarMul( &result->col0, &mat->col0, scalar );
-    vmathSoaV3ScalarMul( &result->col1, &mat->col1, scalar );
-    vmathSoaV3ScalarMul( &result->col2, &mat->col2, scalar );
-}
-
-static inline void vmathSoaM3MulV3( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *vec )
-{
-    vec_float4 tmpX, tmpY, tmpZ;
-    tmpX = spu_add( spu_add( spu_mul( mat->col0.x, vec->x ), spu_mul( mat->col1.x, vec->y ) ), spu_mul( mat->col2.x, vec->z ) );
-    tmpY = spu_add( spu_add( spu_mul( mat->col0.y, vec->x ), spu_mul( mat->col1.y, vec->y ) ), spu_mul( mat->col2.y, vec->z ) );
-    tmpZ = spu_add( spu_add( spu_mul( mat->col0.z, vec->x ), spu_mul( mat->col1.z, vec->y ) ), spu_mul( mat->col2.z, vec->z ) );
-    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathSoaM3Mul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
-{
-    VmathSoaMatrix3 tmpResult;
-    vmathSoaM3MulV3( &tmpResult.col0, mat0, &mat1->col0 );
-    vmathSoaM3MulV3( &tmpResult.col1, mat0, &mat1->col1 );
-    vmathSoaM3MulV3( &tmpResult.col2, mat0, &mat1->col2 );
-    vmathSoaM3Copy( result, &tmpResult );
-}
-
-static inline void vmathSoaM3MulPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
-{
-    vmathSoaV3MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathSoaV3MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathSoaV3MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathSoaM3MakeIdentity( VmathSoaMatrix3 *result )
-{
-    vmathSoaV3MakeXAxis( &result->col0 );
-    vmathSoaV3MakeYAxis( &result->col1 );
-    vmathSoaV3MakeZAxis( &result->col2 );
-}
-
-static inline void vmathSoaM3MakeRotationX( VmathSoaMatrix3 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV3MakeXAxis( &result->col0 );
-    vmathSoaV3MakeFromElems( &result->col1, spu_splats(0.0f), c, s );
-    vmathSoaV3MakeFromElems( &result->col2, spu_splats(0.0f), negatef4( s ), c );
-}
-
-static inline void vmathSoaM3MakeRotationY( VmathSoaMatrix3 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV3MakeFromElems( &result->col0, c, spu_splats(0.0f), negatef4( s ) );
-    vmathSoaV3MakeYAxis( &result->col1 );
-    vmathSoaV3MakeFromElems( &result->col2, s, spu_splats(0.0f), c );
-}
-
-static inline void vmathSoaM3MakeRotationZ( VmathSoaMatrix3 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV3MakeFromElems( &result->col0, c, s, spu_splats(0.0f) );
-    vmathSoaV3MakeFromElems( &result->col1, negatef4( s ), c, spu_splats(0.0f) );
-    vmathSoaV3MakeZAxis( &result->col2 );
-}
-
-static inline void vmathSoaM3MakeRotationZYX( VmathSoaMatrix3 *result, const VmathSoaVector3 *radiansXYZ )
-{
-    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sincosf4( radiansXYZ->x, &sX, &cX );
-    sincosf4( radiansXYZ->y, &sY, &cY );
-    sincosf4( radiansXYZ->z, &sZ, &cZ );
-    tmp0 = spu_mul( cZ, sY );
-    tmp1 = spu_mul( sZ, sY );
-    vmathSoaV3MakeFromElems( &result->col0, spu_mul( cZ, cY ), spu_mul( sZ, cY ), negatef4( sY ) );
-    vmathSoaV3MakeFromElems( &result->col1, spu_sub( spu_mul( tmp0, sX ), spu_mul( sZ, cX ) ), spu_add( spu_mul( tmp1, sX ), spu_mul( cZ, cX ) ), spu_mul( cY, sX ) );
-    vmathSoaV3MakeFromElems( &result->col2, spu_add( spu_mul( tmp0, cX ), spu_mul( sZ, sX ) ), spu_sub( spu_mul( tmp1, cX ), spu_mul( cZ, sX ) ), spu_mul( cY, cX ) );
-}
-
-static inline void vmathSoaM3MakeRotationAxis( VmathSoaMatrix3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
-{
-    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
-    sincosf4( radians, &s, &c );
-    x = unitVec->x;
-    y = unitVec->y;
-    z = unitVec->z;
-    xy = spu_mul( x, y );
-    yz = spu_mul( y, z );
-    zx = spu_mul( z, x );
-    oneMinusC = spu_sub( spu_splats(1.0f), c );
-    vmathSoaV3MakeFromElems( &result->col0, spu_add( spu_mul( spu_mul( x, x ), oneMinusC ), c ), spu_add( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_sub( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ) );
-    vmathSoaV3MakeFromElems( &result->col1, spu_sub( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_add( spu_mul( spu_mul( y, y ), oneMinusC ), c ), spu_add( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ) );
-    vmathSoaV3MakeFromElems( &result->col2, spu_add( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ), spu_sub( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ), spu_add( spu_mul( spu_mul( z, z ), oneMinusC ), c ) );
-}
-
-static inline void vmathSoaM3MakeRotationQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat )
-{
-    vmathSoaM3MakeFromQ( result, unitQuat );
-}
-
-static inline void vmathSoaM3MakeScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec )
-{
-    vmathSoaV3MakeFromElems( &result->col0, scaleVec->x, spu_splats(0.0f), spu_splats(0.0f) );
-    vmathSoaV3MakeFromElems( &result->col1, spu_splats(0.0f), scaleVec->y, spu_splats(0.0f) );
-    vmathSoaV3MakeFromElems( &result->col2, spu_splats(0.0f), spu_splats(0.0f), scaleVec->z );
-}
-
-static inline void vmathSoaM3AppendScale( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *scaleVec )
-{
-    vmathSoaV3ScalarMul( &result->col0, &mat->col0, vmathSoaV3GetX( scaleVec ) );
-    vmathSoaV3ScalarMul( &result->col1, &mat->col1, vmathSoaV3GetY( scaleVec ) );
-    vmathSoaV3ScalarMul( &result->col2, &mat->col2, vmathSoaV3GetZ( scaleVec ) );
-}
-
-static inline void vmathSoaM3PrependScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3MulPerElem( &result->col0, &mat->col0, scaleVec );
-    vmathSoaV3MulPerElem( &result->col1, &mat->col1, scaleVec );
-    vmathSoaV3MulPerElem( &result->col2, &mat->col2, scaleVec );
-}
-
-static inline void vmathSoaM3Select( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1, vec_uint4 select1 )
-{
-    vmathSoaV3Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
-    vmathSoaV3Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
-    vmathSoaV3Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaM3Print( const VmathSoaMatrix3 *mat )
-{
-    VmathMatrix3 mat0, mat1, mat2, mat3;
-    vmathSoaM3Get4Aos( mat, &mat0, &mat1, &mat2, &mat3 );
-    printf("slot 0:\n");
-    vmathM3Print( &mat0 );
-    printf("slot 1:\n");
-    vmathM3Print( &mat1 );
-    printf("slot 2:\n");
-    vmathM3Print( &mat2 );
-    printf("slot 3:\n");
-    vmathM3Print( &mat3 );
-}
-
-static inline void vmathSoaM3Prints( const VmathSoaMatrix3 *mat, const char *name )
-{
-    printf("%s:\n", name);
-    vmathSoaM3Print( mat );
-}
-
-#endif
-
-static inline void vmathSoaM4Copy( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4Copy( &result->col0, &mat->col0 );
-    vmathSoaV4Copy( &result->col1, &mat->col1 );
-    vmathSoaV4Copy( &result->col2, &mat->col2 );
-    vmathSoaV4Copy( &result->col3, &mat->col3 );
-}
-
-static inline void vmathSoaM4MakeFromScalar( VmathSoaMatrix4 *result, vec_float4 scalar )
-{
-    vmathSoaV4MakeFromScalar( &result->col0, scalar );
-    vmathSoaV4MakeFromScalar( &result->col1, scalar );
-    vmathSoaV4MakeFromScalar( &result->col2, scalar );
-    vmathSoaV4MakeFromScalar( &result->col3, scalar );
-}
-
-static inline void vmathSoaM4MakeFromT3( VmathSoaMatrix4 *result, const VmathSoaTransform3 *mat )
-{
-    vmathSoaV4MakeFromV3Scalar( &result->col0, &mat->col0, spu_splats(0.0f) );
-    vmathSoaV4MakeFromV3Scalar( &result->col1, &mat->col1, spu_splats(0.0f) );
-    vmathSoaV4MakeFromV3Scalar( &result->col2, &mat->col2, spu_splats(0.0f) );
-    vmathSoaV4MakeFromV3Scalar( &result->col3, &mat->col3, spu_splats(1.0f) );
-}
-
-static inline void vmathSoaM4MakeFromCols( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col0, const VmathSoaVector4 *_col1, const VmathSoaVector4 *_col2, const VmathSoaVector4 *_col3 )
-{
-    vmathSoaV4Copy( &result->col0, _col0 );
-    vmathSoaV4Copy( &result->col1, _col1 );
-    vmathSoaV4Copy( &result->col2, _col2 );
-    vmathSoaV4Copy( &result->col3, _col3 );
-}
-
-static inline void vmathSoaM4MakeFromM3V3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *translateVec )
-{
-    vmathSoaV4MakeFromV3Scalar( &result->col0, &mat->col0, spu_splats(0.0f) );
-    vmathSoaV4MakeFromV3Scalar( &result->col1, &mat->col1, spu_splats(0.0f) );
-    vmathSoaV4MakeFromV3Scalar( &result->col2, &mat->col2, spu_splats(0.0f) );
-    vmathSoaV4MakeFromV3Scalar( &result->col3, translateVec, spu_splats(1.0f) );
-}
-
-static inline void vmathSoaM4MakeFromQV3( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec )
-{
-    VmathSoaMatrix3 mat;
-    vmathSoaM3MakeFromQ( &mat, unitQuat );
-    vmathSoaV4MakeFromV3Scalar( &result->col0, &mat.col0, spu_splats(0.0f) );
-    vmathSoaV4MakeFromV3Scalar( &result->col1, &mat.col1, spu_splats(0.0f) );
-    vmathSoaV4MakeFromV3Scalar( &result->col2, &mat.col2, spu_splats(0.0f) );
-    vmathSoaV4MakeFromV3Scalar( &result->col3, translateVec, spu_splats(1.0f) );
-}
-
-static inline void vmathSoaM4MakeFromAos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vmathSoaV4MakeFromAos( &result->col0, &mat->col0 );
-    vmathSoaV4MakeFromAos( &result->col1, &mat->col1 );
-    vmathSoaV4MakeFromAos( &result->col2, &mat->col2 );
-    vmathSoaV4MakeFromAos( &result->col3, &mat->col3 );
-}
-
-static inline void vmathSoaM4MakeFrom4Aos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, const VmathMatrix4 *mat2, const VmathMatrix4 *mat3 )
-{
-    vmathSoaV4MakeFrom4Aos( &result->col0, &mat0->col0, &mat1->col0, &mat2->col0, &mat3->col0 );
-    vmathSoaV4MakeFrom4Aos( &result->col1, &mat0->col1, &mat1->col1, &mat2->col1, &mat3->col1 );
-    vmathSoaV4MakeFrom4Aos( &result->col2, &mat0->col2, &mat1->col2, &mat2->col2, &mat3->col2 );
-    vmathSoaV4MakeFrom4Aos( &result->col3, &mat0->col3, &mat1->col3, &mat2->col3, &mat3->col3 );
-}
-
-static inline void vmathSoaM4Get4Aos( const VmathSoaMatrix4 *mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 )
-{
-    vmathSoaV4Get4Aos( &mat->col0, &result0->col0, &result1->col0, &result2->col0, &result3->col0 );
-    vmathSoaV4Get4Aos( &mat->col1, &result0->col1, &result1->col1, &result2->col1, &result3->col1 );
-    vmathSoaV4Get4Aos( &mat->col2, &result0->col2, &result1->col2, &result2->col2, &result3->col2 );
-    vmathSoaV4Get4Aos( &mat->col3, &result0->col3, &result1->col3, &result2->col3, &result3->col3 );
-}
-
-static inline void vmathSoaM4SetCol0( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col0 )
-{
-    vmathSoaV4Copy( &result->col0, _col0 );
-}
-
-static inline void vmathSoaM4SetCol1( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col1 )
-{
-    vmathSoaV4Copy( &result->col1, _col1 );
-}
-
-static inline void vmathSoaM4SetCol2( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col2 )
-{
-    vmathSoaV4Copy( &result->col2, _col2 );
-}
-
-static inline void vmathSoaM4SetCol3( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col3 )
-{
-    vmathSoaV4Copy( &result->col3, _col3 );
-}
-
-static inline void vmathSoaM4SetCol( VmathSoaMatrix4 *result, int col, const VmathSoaVector4 *vec )
-{
-    vmathSoaV4Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathSoaM4SetRow( VmathSoaMatrix4 *result, int row, const VmathSoaVector4 *vec )
-{
-    vmathSoaV4SetElem( &result->col0, row, vmathSoaV4GetElem( vec, 0 ) );
-    vmathSoaV4SetElem( &result->col1, row, vmathSoaV4GetElem( vec, 1 ) );
-    vmathSoaV4SetElem( &result->col2, row, vmathSoaV4GetElem( vec, 2 ) );
-    vmathSoaV4SetElem( &result->col3, row, vmathSoaV4GetElem( vec, 3 ) );
-}
-
-static inline void vmathSoaM4SetElem( VmathSoaMatrix4 *result, int col, int row, vec_float4 val )
-{
-    VmathSoaVector4 tmpV3_0;
-    vmathSoaM4GetCol( &tmpV3_0, result, col );
-    vmathSoaV4SetElem( &tmpV3_0, row, val );
-    vmathSoaM4SetCol( result, col, &tmpV3_0 );
-}
-
-static inline vec_float4 vmathSoaM4GetElem( const VmathSoaMatrix4 *mat, int col, int row )
-{
-    VmathSoaVector4 tmpV4_0;
-    vmathSoaM4GetCol( &tmpV4_0, mat, col );
-    return vmathSoaV4GetElem( &tmpV4_0, row );
-}
-
-static inline void vmathSoaM4GetCol0( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4Copy( result, &mat->col0 );
-}
-
-static inline void vmathSoaM4GetCol1( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4Copy( result, &mat->col1 );
-}
-
-static inline void vmathSoaM4GetCol2( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4Copy( result, &mat->col2 );
-}
-
-static inline void vmathSoaM4GetCol3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4Copy( result, &mat->col3 );
-}
-
-static inline void vmathSoaM4GetCol( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int col )
-{
-    vmathSoaV4Copy( result, (&mat->col0 + col) );
-}
-
-static inline void vmathSoaM4GetRow( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int row )
-{
-    vmathSoaV4MakeFromElems( result, vmathSoaV4GetElem( &mat->col0, row ), vmathSoaV4GetElem( &mat->col1, row ), vmathSoaV4GetElem( &mat->col2, row ), vmathSoaV4GetElem( &mat->col3, row ) );
-}
-
-static inline void vmathSoaM4Transpose( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    VmathSoaMatrix4 tmpResult;
-    vmathSoaV4MakeFromElems( &tmpResult.col0, mat->col0.x, mat->col1.x, mat->col2.x, mat->col3.x );
-    vmathSoaV4MakeFromElems( &tmpResult.col1, mat->col0.y, mat->col1.y, mat->col2.y, mat->col3.y );
-    vmathSoaV4MakeFromElems( &tmpResult.col2, mat->col0.z, mat->col1.z, mat->col2.z, mat->col3.z );
-    vmathSoaV4MakeFromElems( &tmpResult.col3, mat->col0.w, mat->col1.w, mat->col2.w, mat->col3.w );
-    vmathSoaM4Copy( result, &tmpResult );
-}
-
-static inline void vmathSoaM4Inverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    VmathSoaVector4 res0, res1, res2, res3;
-    vec_float4 mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
-    mA = mat->col0.x;
-    mB = mat->col0.y;
-    mC = mat->col0.z;
-    mD = mat->col0.w;
-    mE = mat->col1.x;
-    mF = mat->col1.y;
-    mG = mat->col1.z;
-    mH = mat->col1.w;
-    mI = mat->col2.x;
-    mJ = mat->col2.y;
-    mK = mat->col2.z;
-    mL = mat->col2.w;
-    mM = mat->col3.x;
-    mN = mat->col3.y;
-    mO = mat->col3.z;
-    mP = mat->col3.w;
-    tmp0 = spu_sub( spu_mul( mK, mD ), spu_mul( mC, mL ) );
-    tmp1 = spu_sub( spu_mul( mO, mH ), spu_mul( mG, mP ) );
-    tmp2 = spu_sub( spu_mul( mB, mK ), spu_mul( mJ, mC ) );
-    tmp3 = spu_sub( spu_mul( mF, mO ), spu_mul( mN, mG ) );
-    tmp4 = spu_sub( spu_mul( mJ, mD ), spu_mul( mB, mL ) );
-    tmp5 = spu_sub( spu_mul( mN, mH ), spu_mul( mF, mP ) );
-    vmathSoaV4SetX( &res0, spu_sub( spu_sub( spu_mul( mJ, tmp1 ), spu_mul( mL, tmp3 ) ), spu_mul( mK, tmp5 ) ) );
-    vmathSoaV4SetY( &res0, spu_sub( spu_sub( spu_mul( mN, tmp0 ), spu_mul( mP, tmp2 ) ), spu_mul( mO, tmp4 ) ) );
-    vmathSoaV4SetZ( &res0, spu_sub( spu_add( spu_mul( mD, tmp3 ), spu_mul( mC, tmp5 ) ), spu_mul( mB, tmp1 ) ) );
-    vmathSoaV4SetW( &res0, spu_sub( spu_add( spu_mul( mH, tmp2 ), spu_mul( mG, tmp4 ) ), spu_mul( mF, tmp0 ) ) );
-    detInv = recipf4( spu_add( spu_add( spu_add( spu_mul( mA, res0.x ), spu_mul( mE, res0.y ) ), spu_mul( mI, res0.z ) ), spu_mul( mM, res0.w ) ) );
-    vmathSoaV4SetX( &res1, spu_mul( mI, tmp1 ) );
-    vmathSoaV4SetY( &res1, spu_mul( mM, tmp0 ) );
-    vmathSoaV4SetZ( &res1, spu_mul( mA, tmp1 ) );
-    vmathSoaV4SetW( &res1, spu_mul( mE, tmp0 ) );
-    vmathSoaV4SetX( &res3, spu_mul( mI, tmp3 ) );
-    vmathSoaV4SetY( &res3, spu_mul( mM, tmp2 ) );
-    vmathSoaV4SetZ( &res3, spu_mul( mA, tmp3 ) );
-    vmathSoaV4SetW( &res3, spu_mul( mE, tmp2 ) );
-    vmathSoaV4SetX( &res2, spu_mul( mI, tmp5 ) );
-    vmathSoaV4SetY( &res2, spu_mul( mM, tmp4 ) );
-    vmathSoaV4SetZ( &res2, spu_mul( mA, tmp5 ) );
-    vmathSoaV4SetW( &res2, spu_mul( mE, tmp4 ) );
-    tmp0 = spu_sub( spu_mul( mI, mB ), spu_mul( mA, mJ ) );
-    tmp1 = spu_sub( spu_mul( mM, mF ), spu_mul( mE, mN ) );
-    tmp2 = spu_sub( spu_mul( mI, mD ), spu_mul( mA, mL ) );
-    tmp3 = spu_sub( spu_mul( mM, mH ), spu_mul( mE, mP ) );
-    tmp4 = spu_sub( spu_mul( mI, mC ), spu_mul( mA, mK ) );
-    tmp5 = spu_sub( spu_mul( mM, mG ), spu_mul( mE, mO ) );
-    vmathSoaV4SetX( &res2, spu_add( spu_sub( spu_mul( mL, tmp1 ), spu_mul( mJ, tmp3 ) ), res2.x ) );
-    vmathSoaV4SetY( &res2, spu_add( spu_sub( spu_mul( mP, tmp0 ), spu_mul( mN, tmp2 ) ), res2.y ) );
-    vmathSoaV4SetZ( &res2, spu_sub( spu_sub( spu_mul( mB, tmp3 ), spu_mul( mD, tmp1 ) ), res2.z ) );
-    vmathSoaV4SetW( &res2, spu_sub( spu_sub( spu_mul( mF, tmp2 ), spu_mul( mH, tmp0 ) ), res2.w ) );
-    vmathSoaV4SetX( &res3, spu_add( spu_sub( spu_mul( mJ, tmp5 ), spu_mul( mK, tmp1 ) ), res3.x ) );
-    vmathSoaV4SetY( &res3, spu_add( spu_sub( spu_mul( mN, tmp4 ), spu_mul( mO, tmp0 ) ), res3.y ) );
-    vmathSoaV4SetZ( &res3, spu_sub( spu_sub( spu_mul( mC, tmp1 ), spu_mul( mB, tmp5 ) ), res3.z ) );
-    vmathSoaV4SetW( &res3, spu_sub( spu_sub( spu_mul( mG, tmp0 ), spu_mul( mF, tmp4 ) ), res3.w ) );
-    vmathSoaV4SetX( &res1, spu_sub( spu_sub( spu_mul( mK, tmp3 ), spu_mul( mL, tmp5 ) ), res1.x ) );
-    vmathSoaV4SetY( &res1, spu_sub( spu_sub( spu_mul( mO, tmp2 ), spu_mul( mP, tmp4 ) ), res1.y ) );
-    vmathSoaV4SetZ( &res1, spu_add( spu_sub( spu_mul( mD, tmp5 ), spu_mul( mC, tmp3 ) ), res1.z ) );
-    vmathSoaV4SetW( &res1, spu_add( spu_sub( spu_mul( mH, tmp4 ), spu_mul( mG, tmp2 ) ), res1.w ) );
-    vmathSoaV4ScalarMul( &result->col0, &res0, detInv );
-    vmathSoaV4ScalarMul( &result->col1, &res1, detInv );
-    vmathSoaV4ScalarMul( &result->col2, &res2, detInv );
-    vmathSoaV4ScalarMul( &result->col3, &res3, detInv );
-}
-
-static inline void vmathSoaM4AffineInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    VmathSoaTransform3 affineMat, tmpT3_0;
-    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    vmathSoaV4GetXYZ( &tmpV3_0, &mat->col0 );
-    vmathSoaT3SetCol0( &affineMat, &tmpV3_0 );
-    vmathSoaV4GetXYZ( &tmpV3_1, &mat->col1 );
-    vmathSoaT3SetCol1( &affineMat, &tmpV3_1 );
-    vmathSoaV4GetXYZ( &tmpV3_2, &mat->col2 );
-    vmathSoaT3SetCol2( &affineMat, &tmpV3_2 );
-    vmathSoaV4GetXYZ( &tmpV3_3, &mat->col3 );
-    vmathSoaT3SetCol3( &affineMat, &tmpV3_3 );
-    vmathSoaT3Inverse( &tmpT3_0, &affineMat );
-    vmathSoaM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline void vmathSoaM4OrthoInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    VmathSoaTransform3 affineMat, tmpT3_0;
-    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    vmathSoaV4GetXYZ( &tmpV3_0, &mat->col0 );
-    vmathSoaT3SetCol0( &affineMat, &tmpV3_0 );
-    vmathSoaV4GetXYZ( &tmpV3_1, &mat->col1 );
-    vmathSoaT3SetCol1( &affineMat, &tmpV3_1 );
-    vmathSoaV4GetXYZ( &tmpV3_2, &mat->col2 );
-    vmathSoaT3SetCol2( &affineMat, &tmpV3_2 );
-    vmathSoaV4GetXYZ( &tmpV3_3, &mat->col3 );
-    vmathSoaT3SetCol3( &affineMat, &tmpV3_3 );
-    vmathSoaT3OrthoInverse( &tmpT3_0, &affineMat );
-    vmathSoaM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline vec_float4 vmathSoaM4Determinant( const VmathSoaMatrix4 *mat )
-{
-    vec_float4 dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-    mA = mat->col0.x;
-    mB = mat->col0.y;
-    mC = mat->col0.z;
-    mD = mat->col0.w;
-    mE = mat->col1.x;
-    mF = mat->col1.y;
-    mG = mat->col1.z;
-    mH = mat->col1.w;
-    mI = mat->col2.x;
-    mJ = mat->col2.y;
-    mK = mat->col2.z;
-    mL = mat->col2.w;
-    mM = mat->col3.x;
-    mN = mat->col3.y;
-    mO = mat->col3.z;
-    mP = mat->col3.w;
-    tmp0 = spu_sub( spu_mul( mK, mD ), spu_mul( mC, mL ) );
-    tmp1 = spu_sub( spu_mul( mO, mH ), spu_mul( mG, mP ) );
-    tmp2 = spu_sub( spu_mul( mB, mK ), spu_mul( mJ, mC ) );
-    tmp3 = spu_sub( spu_mul( mF, mO ), spu_mul( mN, mG ) );
-    tmp4 = spu_sub( spu_mul( mJ, mD ), spu_mul( mB, mL ) );
-    tmp5 = spu_sub( spu_mul( mN, mH ), spu_mul( mF, mP ) );
-    dx = spu_sub( spu_sub( spu_mul( mJ, tmp1 ), spu_mul( mL, tmp3 ) ), spu_mul( mK, tmp5 ) );
-    dy = spu_sub( spu_sub( spu_mul( mN, tmp0 ), spu_mul( mP, tmp2 ) ), spu_mul( mO, tmp4 ) );
-    dz = spu_sub( spu_add( spu_mul( mD, tmp3 ), spu_mul( mC, tmp5 ) ), spu_mul( mB, tmp1 ) );
-    dw = spu_sub( spu_add( spu_mul( mH, tmp2 ), spu_mul( mG, tmp4 ) ), spu_mul( mF, tmp0 ) );
-    return spu_add( spu_add( spu_add( spu_mul( mA, dx ), spu_mul( mE, dy ) ), spu_mul( mI, dz ) ), spu_mul( mM, dw ) );
-}
-
-static inline void vmathSoaM4Add( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
-{
-    vmathSoaV4Add( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathSoaV4Add( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathSoaV4Add( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathSoaV4Add( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathSoaM4Sub( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
-{
-    vmathSoaV4Sub( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathSoaV4Sub( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathSoaV4Sub( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathSoaV4Sub( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathSoaM4Neg( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4Neg( &result->col0, &mat->col0 );
-    vmathSoaV4Neg( &result->col1, &mat->col1 );
-    vmathSoaV4Neg( &result->col2, &mat->col2 );
-    vmathSoaV4Neg( &result->col3, &mat->col3 );
-}
-
-static inline void vmathSoaM4AbsPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4AbsPerElem( &result->col0, &mat->col0 );
-    vmathSoaV4AbsPerElem( &result->col1, &mat->col1 );
-    vmathSoaV4AbsPerElem( &result->col2, &mat->col2 );
-    vmathSoaV4AbsPerElem( &result->col3, &mat->col3 );
-}
-
-static inline void vmathSoaM4ScalarMul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, vec_float4 scalar )
-{
-    vmathSoaV4ScalarMul( &result->col0, &mat->col0, scalar );
-    vmathSoaV4ScalarMul( &result->col1, &mat->col1, scalar );
-    vmathSoaV4ScalarMul( &result->col2, &mat->col2, scalar );
-    vmathSoaV4ScalarMul( &result->col3, &mat->col3, scalar );
-}
-
-static inline void vmathSoaM4MulV4( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector4 *vec )
-{
-    vec_float4 tmpX, tmpY, tmpZ, tmpW;
-    tmpX = spu_add( spu_add( spu_add( spu_mul( mat->col0.x, vec->x ), spu_mul( mat->col1.x, vec->y ) ), spu_mul( mat->col2.x, vec->z ) ), spu_mul( mat->col3.x, vec->w ) );
-    tmpY = spu_add( spu_add( spu_add( spu_mul( mat->col0.y, vec->x ), spu_mul( mat->col1.y, vec->y ) ), spu_mul( mat->col2.y, vec->z ) ), spu_mul( mat->col3.y, vec->w ) );
-    tmpZ = spu_add( spu_add( spu_add( spu_mul( mat->col0.z, vec->x ), spu_mul( mat->col1.z, vec->y ) ), spu_mul( mat->col2.z, vec->z ) ), spu_mul( mat->col3.z, vec->w ) );
-    tmpW = spu_add( spu_add( spu_add( spu_mul( mat->col0.w, vec->x ), spu_mul( mat->col1.w, vec->y ) ), spu_mul( mat->col2.w, vec->z ) ), spu_mul( mat->col3.w, vec->w ) );
-    vmathSoaV4MakeFromElems( result, tmpX, tmpY, tmpZ, tmpW );
-}
-
-static inline void vmathSoaM4MulV3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *vec )
-{
-    result->x = spu_add( spu_add( spu_mul( mat->col0.x, vec->x ), spu_mul( mat->col1.x, vec->y ) ), spu_mul( mat->col2.x, vec->z ) );
-    result->y = spu_add( spu_add( spu_mul( mat->col0.y, vec->x ), spu_mul( mat->col1.y, vec->y ) ), spu_mul( mat->col2.y, vec->z ) );
-    result->z = spu_add( spu_add( spu_mul( mat->col0.z, vec->x ), spu_mul( mat->col1.z, vec->y ) ), spu_mul( mat->col2.z, vec->z ) );
-    result->w = spu_add( spu_add( spu_mul( mat->col0.w, vec->x ), spu_mul( mat->col1.w, vec->y ) ), spu_mul( mat->col2.w, vec->z ) );
-}
-
-static inline void vmathSoaM4MulP3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaPoint3 *pnt )
-{
-    result->x = spu_add( spu_add( spu_add( spu_mul( mat->col0.x, pnt->x ), spu_mul( mat->col1.x, pnt->y ) ), spu_mul( mat->col2.x, pnt->z ) ), mat->col3.x );
-    result->y = spu_add( spu_add( spu_add( spu_mul( mat->col0.y, pnt->x ), spu_mul( mat->col1.y, pnt->y ) ), spu_mul( mat->col2.y, pnt->z ) ), mat->col3.y );
-    result->z = spu_add( spu_add( spu_add( spu_mul( mat->col0.z, pnt->x ), spu_mul( mat->col1.z, pnt->y ) ), spu_mul( mat->col2.z, pnt->z ) ), mat->col3.z );
-    result->w = spu_add( spu_add( spu_add( spu_mul( mat->col0.w, pnt->x ), spu_mul( mat->col1.w, pnt->y ) ), spu_mul( mat->col2.w, pnt->z ) ), mat->col3.w );
-}
-
-static inline void vmathSoaM4Mul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
-{
-    VmathSoaMatrix4 tmpResult;
-    vmathSoaM4MulV4( &tmpResult.col0, mat0, &mat1->col0 );
-    vmathSoaM4MulV4( &tmpResult.col1, mat0, &mat1->col1 );
-    vmathSoaM4MulV4( &tmpResult.col2, mat0, &mat1->col2 );
-    vmathSoaM4MulV4( &tmpResult.col3, mat0, &mat1->col3 );
-    vmathSoaM4Copy( result, &tmpResult );
-}
-
-static inline void vmathSoaM4MulT3( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaTransform3 *tfrm1 )
-{
-    VmathSoaMatrix4 tmpResult;
-    VmathSoaPoint3 tmpP3_0;
-    vmathSoaM4MulV3( &tmpResult.col0, mat, &tfrm1->col0 );
-    vmathSoaM4MulV3( &tmpResult.col1, mat, &tfrm1->col1 );
-    vmathSoaM4MulV3( &tmpResult.col2, mat, &tfrm1->col2 );
-    vmathSoaP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
-    vmathSoaM4MulP3( &tmpResult.col3, mat, &tmpP3_0 );
-    vmathSoaM4Copy( result, &tmpResult );
-}
-
-static inline void vmathSoaM4MulPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
-{
-    vmathSoaV4MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathSoaV4MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathSoaV4MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathSoaV4MulPerElem( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathSoaM4MakeIdentity( VmathSoaMatrix4 *result )
-{
-    vmathSoaV4MakeXAxis( &result->col0 );
-    vmathSoaV4MakeYAxis( &result->col1 );
-    vmathSoaV4MakeZAxis( &result->col2 );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4SetUpper3x3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat3 )
-{
-    vmathSoaV4SetXYZ( &result->col0, &mat3->col0 );
-    vmathSoaV4SetXYZ( &result->col1, &mat3->col1 );
-    vmathSoaV4SetXYZ( &result->col2, &mat3->col2 );
-}
-
-static inline void vmathSoaM4GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4GetXYZ( &result->col0, &mat->col0 );
-    vmathSoaV4GetXYZ( &result->col1, &mat->col1 );
-    vmathSoaV4GetXYZ( &result->col2, &mat->col2 );
-}
-
-static inline void vmathSoaM4SetTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec )
-{
-    vmathSoaV4SetXYZ( &result->col3, translateVec );
-}
-
-static inline void vmathSoaM4GetTranslation( VmathSoaVector3 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4GetXYZ( result, &mat->col3 );
-}
-
-static inline void vmathSoaM4MakeRotationX( VmathSoaMatrix4 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV4MakeXAxis( &result->col0 );
-    vmathSoaV4MakeFromElems( &result->col1, spu_splats(0.0f), c, s, spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col2, spu_splats(0.0f), negatef4( s ), c, spu_splats(0.0f) );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4MakeRotationY( VmathSoaMatrix4 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV4MakeFromElems( &result->col0, c, spu_splats(0.0f), negatef4( s ), spu_splats(0.0f) );
-    vmathSoaV4MakeYAxis( &result->col1 );
-    vmathSoaV4MakeFromElems( &result->col2, s, spu_splats(0.0f), c, spu_splats(0.0f) );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4MakeRotationZ( VmathSoaMatrix4 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV4MakeFromElems( &result->col0, c, s, spu_splats(0.0f), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col1, negatef4( s ), c, spu_splats(0.0f), spu_splats(0.0f) );
-    vmathSoaV4MakeZAxis( &result->col2 );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4MakeRotationZYX( VmathSoaMatrix4 *result, const VmathSoaVector3 *radiansXYZ )
-{
-    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sincosf4( radiansXYZ->x, &sX, &cX );
-    sincosf4( radiansXYZ->y, &sY, &cY );
-    sincosf4( radiansXYZ->z, &sZ, &cZ );
-    tmp0 = spu_mul( cZ, sY );
-    tmp1 = spu_mul( sZ, sY );
-    vmathSoaV4MakeFromElems( &result->col0, spu_mul( cZ, cY ), spu_mul( sZ, cY ), negatef4( sY ), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col1, spu_sub( spu_mul( tmp0, sX ), spu_mul( sZ, cX ) ), spu_add( spu_mul( tmp1, sX ), spu_mul( cZ, cX ) ), spu_mul( cY, sX ), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col2, spu_add( spu_mul( tmp0, cX ), spu_mul( sZ, sX ) ), spu_sub( spu_mul( tmp1, cX ), spu_mul( cZ, sX ) ), spu_mul( cY, cX ), spu_splats(0.0f) );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4MakeRotationAxis( VmathSoaMatrix4 *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
-{
-    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
-    sincosf4( radians, &s, &c );
-    x = unitVec->x;
-    y = unitVec->y;
-    z = unitVec->z;
-    xy = spu_mul( x, y );
-    yz = spu_mul( y, z );
-    zx = spu_mul( z, x );
-    oneMinusC = spu_sub( spu_splats(1.0f), c );
-    vmathSoaV4MakeFromElems( &result->col0, spu_add( spu_mul( spu_mul( x, x ), oneMinusC ), c ), spu_add( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_sub( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col1, spu_sub( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_add( spu_mul( spu_mul( y, y ), oneMinusC ), c ), spu_add( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col2, spu_add( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ), spu_sub( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ), spu_add( spu_mul( spu_mul( z, z ), oneMinusC ), c ), spu_splats(0.0f) );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4MakeRotationQ( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat )
-{
-    VmathSoaTransform3 tmpT3_0;
-    vmathSoaT3MakeRotationQ( &tmpT3_0, unitQuat );
-    vmathSoaM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline void vmathSoaM4MakeScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec )
-{
-    vmathSoaV4MakeFromElems( &result->col0, scaleVec->x, spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col1, spu_splats(0.0f), scaleVec->y, spu_splats(0.0f), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col2, spu_splats(0.0f), spu_splats(0.0f), scaleVec->z, spu_splats(0.0f) );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4AppendScale( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *scaleVec )
-{
-    vmathSoaV4ScalarMul( &result->col0, &mat->col0, vmathSoaV3GetX( scaleVec ) );
-    vmathSoaV4ScalarMul( &result->col1, &mat->col1, vmathSoaV3GetY( scaleVec ) );
-    vmathSoaV4ScalarMul( &result->col2, &mat->col2, vmathSoaV3GetZ( scaleVec ) );
-    vmathSoaV4Copy( &result->col3, &mat->col3 );
-}
-
-static inline void vmathSoaM4PrependScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix4 *mat )
-{
-    VmathSoaVector4 scale4;
-    vmathSoaV4MakeFromV3Scalar( &scale4, scaleVec, spu_splats(1.0f) );
-    vmathSoaV4MulPerElem( &result->col0, &mat->col0, &scale4 );
-    vmathSoaV4MulPerElem( &result->col1, &mat->col1, &scale4 );
-    vmathSoaV4MulPerElem( &result->col2, &mat->col2, &scale4 );
-    vmathSoaV4MulPerElem( &result->col3, &mat->col3, &scale4 );
-}
-
-static inline void vmathSoaM4MakeTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec )
-{
-    vmathSoaV4MakeXAxis( &result->col0 );
-    vmathSoaV4MakeYAxis( &result->col1 );
-    vmathSoaV4MakeZAxis( &result->col2 );
-    vmathSoaV4MakeFromV3Scalar( &result->col3, translateVec, spu_splats(1.0f) );
-}
-
-static inline void vmathSoaM4MakeLookAt( VmathSoaMatrix4 *result, const VmathSoaPoint3 *eyePos, const VmathSoaPoint3 *lookAtPos, const VmathSoaVector3 *upVec )
-{
-    VmathSoaMatrix4 m4EyeFrame;
-    VmathSoaVector3 v3X, v3Y, v3Z, tmpV3_0, tmpV3_1;
-    VmathSoaVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
-    vmathSoaV3Normalize( &v3Y, upVec );
-    vmathSoaP3Sub( &tmpV3_0, eyePos, lookAtPos );
-    vmathSoaV3Normalize( &v3Z, &tmpV3_0 );
-    vmathSoaV3Cross( &tmpV3_1, &v3Y, &v3Z );
-    vmathSoaV3Normalize( &v3X, &tmpV3_1 );
-    vmathSoaV3Cross( &v3Y, &v3Z, &v3X );
-    vmathSoaV4MakeFromV3( &tmpV4_0, &v3X );
-    vmathSoaV4MakeFromV3( &tmpV4_1, &v3Y );
-    vmathSoaV4MakeFromV3( &tmpV4_2, &v3Z );
-    vmathSoaV4MakeFromP3( &tmpV4_3, eyePos );
-    vmathSoaM4MakeFromCols( &m4EyeFrame, &tmpV4_0, &tmpV4_1, &tmpV4_2, &tmpV4_3 );
-    vmathSoaM4OrthoInverse( result, &m4EyeFrame );
-}
-
-static inline void vmathSoaM4MakePerspective( VmathSoaMatrix4 *result, vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar )
-{
-    vec_float4 f, rangeInv;
-    f = tanf4( spu_sub( spu_splats( _VECTORMATH_PI_OVER_2 ), spu_mul( spu_splats(0.5f), fovyRadians ) ) );
-    rangeInv = recipf4( spu_sub( zNear, zFar ) );
-    vmathSoaV4MakeFromElems( &result->col0, divf4( f, aspect ), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col1, spu_splats(0.0f), f, spu_splats(0.0f), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col2, spu_splats(0.0f), spu_splats(0.0f), spu_mul( spu_add( zNear, zFar ), rangeInv ), spu_splats(-1.0f) );
-    vmathSoaV4MakeFromElems( &result->col3, spu_splats(0.0f), spu_splats(0.0f), spu_mul( spu_mul( spu_mul( zNear, zFar ), rangeInv ), spu_splats(2.0f) ), spu_splats(0.0f) );
-}
-
-static inline void vmathSoaM4MakeFrustum( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
-{
-    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
-    sum_rl = spu_add( right, left );
-    sum_tb = spu_add( top, bottom );
-    sum_nf = spu_add( zNear, zFar );
-    inv_rl = recipf4( spu_sub( right, left ) );
-    inv_tb = recipf4( spu_sub( top, bottom ) );
-    inv_nf = recipf4( spu_sub( zNear, zFar ) );
-    n2 = spu_add( zNear, zNear );
-    vmathSoaV4MakeFromElems( &result->col0, spu_mul( n2, inv_rl ), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col1, spu_splats(0.0f), spu_mul( n2, inv_tb ), spu_splats(0.0f), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col2, spu_mul( sum_rl, inv_rl ), spu_mul( sum_tb, inv_tb ), spu_mul( sum_nf, inv_nf ), spu_splats(-1.0f) );
-    vmathSoaV4MakeFromElems( &result->col3, spu_splats(0.0f), spu_splats(0.0f), spu_mul( spu_mul( n2, inv_nf ), zFar ), spu_splats(0.0f) );
-}
-
-static inline void vmathSoaM4MakeOrthographic( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
-{
-    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
-    sum_rl = spu_add( right, left );
-    sum_tb = spu_add( top, bottom );
-    sum_nf = spu_add( zNear, zFar );
-    inv_rl = recipf4( spu_sub( right, left ) );
-    inv_tb = recipf4( spu_sub( top, bottom ) );
-    inv_nf = recipf4( spu_sub( zNear, zFar ) );
-    vmathSoaV4MakeFromElems( &result->col0, spu_add( inv_rl, inv_rl ), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col1, spu_splats(0.0f), spu_add( inv_tb, inv_tb ), spu_splats(0.0f), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col2, spu_splats(0.0f), spu_splats(0.0f), spu_add( inv_nf, inv_nf ), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col3, spu_mul( negatef4( sum_rl ), inv_rl ), spu_mul( negatef4( sum_tb ), inv_tb ), spu_mul( sum_nf, inv_nf ), spu_splats(1.0f) );
-}
-
-static inline void vmathSoaM4Select( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1, vec_uint4 select1 )
-{
-    vmathSoaV4Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
-    vmathSoaV4Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
-    vmathSoaV4Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
-    vmathSoaV4Select( &result->col3, &mat0->col3, &mat1->col3, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaM4Print( const VmathSoaMatrix4 *mat )
-{
-    VmathMatrix4 mat0, mat1, mat2, mat3;
-    vmathSoaM4Get4Aos( mat, &mat0, &mat1, &mat2, &mat3 );
-    printf("slot 0:\n");
-    vmathM4Print( &mat0 );
-    printf("slot 1:\n");
-    vmathM4Print( &mat1 );
-    printf("slot 2:\n");
-    vmathM4Print( &mat2 );
-    printf("slot 3:\n");
-    vmathM4Print( &mat3 );
-}
-
-static inline void vmathSoaM4Prints( const VmathSoaMatrix4 *mat, const char *name )
-{
-    printf("%s:\n", name);
-    vmathSoaM4Print( mat );
-}
-
-#endif
-
-static inline void vmathSoaT3Copy( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3Copy( &result->col0, &tfrm->col0 );
-    vmathSoaV3Copy( &result->col1, &tfrm->col1 );
-    vmathSoaV3Copy( &result->col2, &tfrm->col2 );
-    vmathSoaV3Copy( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathSoaT3MakeFromScalar( VmathSoaTransform3 *result, vec_float4 scalar )
-{
-    vmathSoaV3MakeFromScalar( &result->col0, scalar );
-    vmathSoaV3MakeFromScalar( &result->col1, scalar );
-    vmathSoaV3MakeFromScalar( &result->col2, scalar );
-    vmathSoaV3MakeFromScalar( &result->col3, scalar );
-}
-
-static inline void vmathSoaT3MakeFromCols( VmathSoaTransform3 *result, const VmathSoaVector3 *_col0, const VmathSoaVector3 *_col1, const VmathSoaVector3 *_col2, const VmathSoaVector3 *_col3 )
-{
-    vmathSoaV3Copy( &result->col0, _col0 );
-    vmathSoaV3Copy( &result->col1, _col1 );
-    vmathSoaV3Copy( &result->col2, _col2 );
-    vmathSoaV3Copy( &result->col3, _col3 );
-}
-
-static inline void vmathSoaT3MakeFromM3V3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *tfrm, const VmathSoaVector3 *translateVec )
-{
-    vmathSoaT3SetUpper3x3( result, tfrm );
-    vmathSoaT3SetTranslation( result, translateVec );
-}
-
-static inline void vmathSoaT3MakeFromQV3( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec )
-{
-    VmathSoaMatrix3 tmpM3_0;
-    vmathSoaM3MakeFromQ( &tmpM3_0, unitQuat );
-    vmathSoaT3SetUpper3x3( result, &tmpM3_0 );
-    vmathSoaT3SetTranslation( result, translateVec );
-}
-
-static inline void vmathSoaT3MakeFromAos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm )
-{
-    vmathSoaV3MakeFromAos( &result->col0, &tfrm->col0 );
-    vmathSoaV3MakeFromAos( &result->col1, &tfrm->col1 );
-    vmathSoaV3MakeFromAos( &result->col2, &tfrm->col2 );
-    vmathSoaV3MakeFromAos( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathSoaT3MakeFrom4Aos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, const VmathTransform3 *tfrm2, const VmathTransform3 *tfrm3 )
-{
-    vmathSoaV3MakeFrom4Aos( &result->col0, &tfrm0->col0, &tfrm1->col0, &tfrm2->col0, &tfrm3->col0 );
-    vmathSoaV3MakeFrom4Aos( &result->col1, &tfrm0->col1, &tfrm1->col1, &tfrm2->col1, &tfrm3->col1 );
-    vmathSoaV3MakeFrom4Aos( &result->col2, &tfrm0->col2, &tfrm1->col2, &tfrm2->col2, &tfrm3->col2 );
-    vmathSoaV3MakeFrom4Aos( &result->col3, &tfrm0->col3, &tfrm1->col3, &tfrm2->col3, &tfrm3->col3 );
-}
-
-static inline void vmathSoaT3Get4Aos( const VmathSoaTransform3 *tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 )
-{
-    vmathSoaV3Get4Aos( &tfrm->col0, &result0->col0, &result1->col0, &result2->col0, &result3->col0 );
-    vmathSoaV3Get4Aos( &tfrm->col1, &result0->col1, &result1->col1, &result2->col1, &result3->col1 );
-    vmathSoaV3Get4Aos( &tfrm->col2, &result0->col2, &result1->col2, &result2->col2, &result3->col2 );
-    vmathSoaV3Get4Aos( &tfrm->col3, &result0->col3, &result1->col3, &result2->col3, &result3->col3 );
-}
-
-static inline void vmathSoaT3SetCol0( VmathSoaTransform3 *result, const VmathSoaVector3 *_col0 )
-{
-    vmathSoaV3Copy( &result->col0, _col0 );
-}
-
-static inline void vmathSoaT3SetCol1( VmathSoaTransform3 *result, const VmathSoaVector3 *_col1 )
-{
-    vmathSoaV3Copy( &result->col1, _col1 );
-}
-
-static inline void vmathSoaT3SetCol2( VmathSoaTransform3 *result, const VmathSoaVector3 *_col2 )
-{
-    vmathSoaV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathSoaT3SetCol3( VmathSoaTransform3 *result, const VmathSoaVector3 *_col3 )
-{
-    vmathSoaV3Copy( &result->col3, _col3 );
-}
-
-static inline void vmathSoaT3SetCol( VmathSoaTransform3 *result, int col, const VmathSoaVector3 *vec )
-{
-    vmathSoaV3Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathSoaT3SetRow( VmathSoaTransform3 *result, int row, const VmathSoaVector4 *vec )
-{
-    vmathSoaV3SetElem( &result->col0, row, vmathSoaV4GetElem( vec, 0 ) );
-    vmathSoaV3SetElem( &result->col1, row, vmathSoaV4GetElem( vec, 1 ) );
-    vmathSoaV3SetElem( &result->col2, row, vmathSoaV4GetElem( vec, 2 ) );
-    vmathSoaV3SetElem( &result->col3, row, vmathSoaV4GetElem( vec, 3 ) );
-}
-
-static inline void vmathSoaT3SetElem( VmathSoaTransform3 *result, int col, int row, vec_float4 val )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaT3GetCol( &tmpV3_0, result, col );
-    vmathSoaV3SetElem( &tmpV3_0, row, val );
-    vmathSoaT3SetCol( result, col, &tmpV3_0 );
-}
-
-static inline vec_float4 vmathSoaT3GetElem( const VmathSoaTransform3 *tfrm, int col, int row )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaT3GetCol( &tmpV3_0, tfrm, col );
-    return vmathSoaV3GetElem( &tmpV3_0, row );
-}
-
-static inline void vmathSoaT3GetCol0( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3Copy( result, &tfrm->col0 );
-}
-
-static inline void vmathSoaT3GetCol1( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3Copy( result, &tfrm->col1 );
-}
-
-static inline void vmathSoaT3GetCol2( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3Copy( result, &tfrm->col2 );
-}
-
-static inline void vmathSoaT3GetCol3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3Copy( result, &tfrm->col3 );
-}
-
-static inline void vmathSoaT3GetCol( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, int col )
-{
-    vmathSoaV3Copy( result, (&tfrm->col0 + col) );
-}
-
-static inline void vmathSoaT3GetRow( VmathSoaVector4 *result, const VmathSoaTransform3 *tfrm, int row )
-{
-    vmathSoaV4MakeFromElems( result, vmathSoaV3GetElem( &tfrm->col0, row ), vmathSoaV3GetElem( &tfrm->col1, row ), vmathSoaV3GetElem( &tfrm->col2, row ), vmathSoaV3GetElem( &tfrm->col3, row ) );
-}
-
-static inline void vmathSoaT3Inverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
-{
-    VmathSoaVector3 tmp0, tmp1, tmp2, inv0, inv1, inv2, tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5;
-    vec_float4 detinv;
-    vmathSoaV3Cross( &tmp0, &tfrm->col1, &tfrm->col2 );
-    vmathSoaV3Cross( &tmp1, &tfrm->col2, &tfrm->col0 );
-    vmathSoaV3Cross( &tmp2, &tfrm->col0, &tfrm->col1 );
-    detinv = recipf4( vmathSoaV3Dot( &tfrm->col2, &tmp2 ) );
-    vmathSoaV3MakeFromElems( &inv0, spu_mul( tmp0.x, detinv ), spu_mul( tmp1.x, detinv ), spu_mul( tmp2.x, detinv ) );
-    vmathSoaV3MakeFromElems( &inv1, spu_mul( tmp0.y, detinv ), spu_mul( tmp1.y, detinv ), spu_mul( tmp2.y, detinv ) );
-    vmathSoaV3MakeFromElems( &inv2, spu_mul( tmp0.z, detinv ), spu_mul( tmp1.z, detinv ), spu_mul( tmp2.z, detinv ) );
-    vmathSoaV3Copy( &result->col0, &inv0 );
-    vmathSoaV3Copy( &result->col1, &inv1 );
-    vmathSoaV3Copy( &result->col2, &inv2 );
-    vmathSoaV3ScalarMul( &tmpV3_0, &inv0, tfrm->col3.x );
-    vmathSoaV3ScalarMul( &tmpV3_1, &inv1, tfrm->col3.y );
-    vmathSoaV3ScalarMul( &tmpV3_2, &inv2, tfrm->col3.z );
-    vmathSoaV3Add( &tmpV3_3, &tmpV3_1, &tmpV3_2 );
-    vmathSoaV3Add( &tmpV3_4, &tmpV3_0, &tmpV3_3 );
-    vmathSoaV3Neg( &tmpV3_5, &tmpV3_4 );
-    vmathSoaV3Copy( &result->col3, &tmpV3_5 );
-}
-
-static inline void vmathSoaT3OrthoInverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
-{
-    VmathSoaVector3 inv0, inv1, inv2, tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5;
-    vmathSoaV3MakeFromElems( &inv0, tfrm->col0.x, tfrm->col1.x, tfrm->col2.x );
-    vmathSoaV3MakeFromElems( &inv1, tfrm->col0.y, tfrm->col1.y, tfrm->col2.y );
-    vmathSoaV3MakeFromElems( &inv2, tfrm->col0.z, tfrm->col1.z, tfrm->col2.z );
-    vmathSoaV3Copy( &result->col0, &inv0 );
-    vmathSoaV3Copy( &result->col1, &inv1 );
-    vmathSoaV3Copy( &result->col2, &inv2 );
-    vmathSoaV3ScalarMul( &tmpV3_0, &inv0, tfrm->col3.x );
-    vmathSoaV3ScalarMul( &tmpV3_1, &inv1, tfrm->col3.y );
-    vmathSoaV3ScalarMul( &tmpV3_2, &inv2, tfrm->col3.z );
-    vmathSoaV3Add( &tmpV3_3, &tmpV3_1, &tmpV3_2 );
-    vmathSoaV3Add( &tmpV3_4, &tmpV3_0, &tmpV3_3 );
-    vmathSoaV3Neg( &tmpV3_5, &tmpV3_4 );
-    vmathSoaV3Copy( &result->col3, &tmpV3_5 );
-}
-
-static inline void vmathSoaT3AbsPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3AbsPerElem( &result->col0, &tfrm->col0 );
-    vmathSoaV3AbsPerElem( &result->col1, &tfrm->col1 );
-    vmathSoaV3AbsPerElem( &result->col2, &tfrm->col2 );
-    vmathSoaV3AbsPerElem( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathSoaT3MulV3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *vec )
-{
-    vec_float4 tmpX, tmpY, tmpZ;
-    tmpX = spu_add( spu_add( spu_mul( tfrm->col0.x, vec->x ), spu_mul( tfrm->col1.x, vec->y ) ), spu_mul( tfrm->col2.x, vec->z ) );
-    tmpY = spu_add( spu_add( spu_mul( tfrm->col0.y, vec->x ), spu_mul( tfrm->col1.y, vec->y ) ), spu_mul( tfrm->col2.y, vec->z ) );
-    tmpZ = spu_add( spu_add( spu_mul( tfrm->col0.z, vec->x ), spu_mul( tfrm->col1.z, vec->y ) ), spu_mul( tfrm->col2.z, vec->z ) );
-    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathSoaT3MulP3( VmathSoaPoint3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaPoint3 *pnt )
-{
-    vec_float4 tmpX, tmpY, tmpZ;
-    tmpX = spu_add( spu_add( spu_add( spu_mul( tfrm->col0.x, pnt->x ), spu_mul( tfrm->col1.x, pnt->y ) ), spu_mul( tfrm->col2.x, pnt->z ) ), tfrm->col3.x );
-    tmpY = spu_add( spu_add( spu_add( spu_mul( tfrm->col0.y, pnt->x ), spu_mul( tfrm->col1.y, pnt->y ) ), spu_mul( tfrm->col2.y, pnt->z ) ), tfrm->col3.y );
-    tmpZ = spu_add( spu_add( spu_add( spu_mul( tfrm->col0.z, pnt->x ), spu_mul( tfrm->col1.z, pnt->y ) ), spu_mul( tfrm->col2.z, pnt->z ) ), tfrm->col3.z );
-    vmathSoaP3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathSoaT3Mul( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 )
-{
-    VmathSoaTransform3 tmpResult;
-    VmathSoaPoint3 tmpP3_0, tmpP3_1;
-    vmathSoaT3MulV3( &tmpResult.col0, tfrm0, &tfrm1->col0 );
-    vmathSoaT3MulV3( &tmpResult.col1, tfrm0, &tfrm1->col1 );
-    vmathSoaT3MulV3( &tmpResult.col2, tfrm0, &tfrm1->col2 );
-    vmathSoaP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
-    vmathSoaT3MulP3( &tmpP3_1, tfrm0, &tmpP3_0 );
-    vmathSoaV3MakeFromP3( &tmpResult.col3, &tmpP3_1 );
-    vmathSoaT3Copy( result, &tmpResult );
-}
-
-static inline void vmathSoaT3MulPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 )
-{
-    vmathSoaV3MulPerElem( &result->col0, &tfrm0->col0, &tfrm1->col0 );
-    vmathSoaV3MulPerElem( &result->col1, &tfrm0->col1, &tfrm1->col1 );
-    vmathSoaV3MulPerElem( &result->col2, &tfrm0->col2, &tfrm1->col2 );
-    vmathSoaV3MulPerElem( &result->col3, &tfrm0->col3, &tfrm1->col3 );
-}
-
-static inline void vmathSoaT3MakeIdentity( VmathSoaTransform3 *result )
-{
-    vmathSoaV3MakeXAxis( &result->col0 );
-    vmathSoaV3MakeYAxis( &result->col1 );
-    vmathSoaV3MakeZAxis( &result->col2 );
-    vmathSoaV3MakeFromScalar( &result->col3, spu_splats(0.0f) );
-}
-
-static inline void vmathSoaT3SetUpper3x3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *tfrm )
-{
-    vmathSoaV3Copy( &result->col0, &tfrm->col0 );
-    vmathSoaV3Copy( &result->col1, &tfrm->col1 );
-    vmathSoaV3Copy( &result->col2, &tfrm->col2 );
-}
-
-static inline void vmathSoaT3GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaM3MakeFromCols( result, &tfrm->col0, &tfrm->col1, &tfrm->col2 );
-}
-
-static inline void vmathSoaT3SetTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec )
-{
-    vmathSoaV3Copy( &result->col3, translateVec );
-}
-
-static inline void vmathSoaT3GetTranslation( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3Copy( result, &tfrm->col3 );
-}
-
-static inline void vmathSoaT3MakeRotationX( VmathSoaTransform3 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV3MakeXAxis( &result->col0 );
-    vmathSoaV3MakeFromElems( &result->col1, spu_splats(0.0f), c, s );
-    vmathSoaV3MakeFromElems( &result->col2, spu_splats(0.0f), negatef4( s ), c );
-    vmathSoaV3MakeFromScalar( &result->col3, spu_splats(0.0f) );
-}
-
-static inline void vmathSoaT3MakeRotationY( VmathSoaTransform3 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV3MakeFromElems( &result->col0, c, spu_splats(0.0f), negatef4( s ) );
-    vmathSoaV3MakeYAxis( &result->col1 );
-    vmathSoaV3MakeFromElems( &result->col2, s, spu_splats(0.0f), c );
-    vmathSoaV3MakeFromScalar( &result->col3, spu_splats(0.0f) );
-}
-
-static inline void vmathSoaT3MakeRotationZ( VmathSoaTransform3 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV3MakeFromElems( &result->col0, c, s, spu_splats(0.0f) );
-    vmathSoaV3MakeFromElems( &result->col1, negatef4( s ), c, spu_splats(0.0f) );
-    vmathSoaV3MakeZAxis( &result->col2 );
-    vmathSoaV3MakeFromScalar( &result->col3, spu_splats(0.0f) );
-}
-
-static inline void vmathSoaT3MakeRotationZYX( VmathSoaTransform3 *result, const VmathSoaVector3 *radiansXYZ )
-{
-    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sincosf4( radiansXYZ->x, &sX, &cX );
-    sincosf4( radiansXYZ->y, &sY, &cY );
-    sincosf4( radiansXYZ->z, &sZ, &cZ );
-    tmp0 = spu_mul( cZ, sY );
-    tmp1 = spu_mul( sZ, sY );
-    vmathSoaV3MakeFromElems( &result->col0, spu_mul( cZ, cY ), spu_mul( sZ, cY ), negatef4( sY ) );
-    vmathSoaV3MakeFromElems( &result->col1, spu_sub( spu_mul( tmp0, sX ), spu_mul( sZ, cX ) ), spu_add( spu_mul( tmp1, sX ), spu_mul( cZ, cX ) ), spu_mul( cY, sX ) );
-    vmathSoaV3MakeFromElems( &result->col2, spu_add( spu_mul( tmp0, cX ), spu_mul( sZ, sX ) ), spu_sub( spu_mul( tmp1, cX ), spu_mul( cZ, sX ) ), spu_mul( cY, cX ) );
-    vmathSoaV3MakeFromScalar( &result->col3, spu_splats(0.0f) );
-}
-
-static inline void vmathSoaT3MakeRotationAxis( VmathSoaTransform3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
-{
-    VmathSoaMatrix3 tmpM3_0;
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaM3MakeRotationAxis( &tmpM3_0, radians, unitVec );
-    vmathSoaV3MakeFromScalar( &tmpV3_0, spu_splats(0.0f) );
-    vmathSoaT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
-}
-
-static inline void vmathSoaT3MakeRotationQ( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat )
-{
-    VmathSoaMatrix3 tmpM3_0;
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaM3MakeFromQ( &tmpM3_0, unitQuat );
-    vmathSoaV3MakeFromScalar( &tmpV3_0, spu_splats(0.0f) );
-    vmathSoaT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
-}
-
-static inline void vmathSoaT3MakeScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec )
-{
-    vmathSoaV3MakeFromElems( &result->col0, scaleVec->x, spu_splats(0.0f), spu_splats(0.0f) );
-    vmathSoaV3MakeFromElems( &result->col1, spu_splats(0.0f), scaleVec->y, spu_splats(0.0f) );
-    vmathSoaV3MakeFromElems( &result->col2, spu_splats(0.0f), spu_splats(0.0f), scaleVec->z );
-    vmathSoaV3MakeFromScalar( &result->col3, spu_splats(0.0f) );
-}
-
-static inline void vmathSoaT3AppendScale( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *scaleVec )
-{
-    vmathSoaV3ScalarMul( &result->col0, &tfrm->col0, vmathSoaV3GetX( scaleVec ) );
-    vmathSoaV3ScalarMul( &result->col1, &tfrm->col1, vmathSoaV3GetY( scaleVec ) );
-    vmathSoaV3ScalarMul( &result->col2, &tfrm->col2, vmathSoaV3GetZ( scaleVec ) );
-    vmathSoaV3Copy( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathSoaT3PrependScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3MulPerElem( &result->col0, &tfrm->col0, scaleVec );
-    vmathSoaV3MulPerElem( &result->col1, &tfrm->col1, scaleVec );
-    vmathSoaV3MulPerElem( &result->col2, &tfrm->col2, scaleVec );
-    vmathSoaV3MulPerElem( &result->col3, &tfrm->col3, scaleVec );
-}
-
-static inline void vmathSoaT3MakeTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec )
-{
-    vmathSoaV3MakeXAxis( &result->col0 );
-    vmathSoaV3MakeYAxis( &result->col1 );
-    vmathSoaV3MakeZAxis( &result->col2 );
-    vmathSoaV3Copy( &result->col3, translateVec );
-}
-
-static inline void vmathSoaT3Select( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1, vec_uint4 select1 )
-{
-    vmathSoaV3Select( &result->col0, &tfrm0->col0, &tfrm1->col0, select1 );
-    vmathSoaV3Select( &result->col1, &tfrm0->col1, &tfrm1->col1, select1 );
-    vmathSoaV3Select( &result->col2, &tfrm0->col2, &tfrm1->col2, select1 );
-    vmathSoaV3Select( &result->col3, &tfrm0->col3, &tfrm1->col3, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaT3Print( const VmathSoaTransform3 *tfrm )
-{
-    VmathTransform3 mat0, mat1, mat2, mat3;
-    vmathSoaT3Get4Aos( tfrm, &mat0, &mat1, &mat2, &mat3 );
-    printf("slot 0:\n");
-    vmathT3Print( &mat0 );
-    printf("slot 1:\n");
-    vmathT3Print( &mat1 );
-    printf("slot 2:\n");
-    vmathT3Print( &mat2 );
-    printf("slot 3:\n");
-    vmathT3Print( &mat3 );
-}
-
-static inline void vmathSoaT3Prints( const VmathSoaTransform3 *tfrm, const char *name )
-{
-    printf("%s:\n", name);
-    vmathSoaT3Print( tfrm );
-}
-
-#endif
-
-static inline void vmathSoaQMakeFromM3( VmathSoaQuat *result, const VmathSoaMatrix3 *tfrm )
-{
-    vec_float4 trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
-    vec_uint4 negTrace, ZgtX, ZgtY, YgtX;
-    vec_uint4 largestXorY, largestYorZ, largestZorX;
-
-    xx = tfrm->col0.x;
-    yx = tfrm->col0.y;
-    zx = tfrm->col0.z;
-    xy = tfrm->col1.x;
-    yy = tfrm->col1.y;
-    zy = tfrm->col1.z;
-    xz = tfrm->col2.x;
-    yz = tfrm->col2.y;
-    zz = tfrm->col2.z;
-
-    trace = spu_add( spu_add( xx, yy ), zz );
-
-    negTrace = spu_cmpgt( spu_splats(0.0f), trace );
-    ZgtX = spu_cmpgt( zz, xx );
-    ZgtY = spu_cmpgt( zz, yy );
-    YgtX = spu_cmpgt( yy, xx );
-    largestXorY = spu_and( negTrace, spu_nand( ZgtX, ZgtY ) );
-    largestYorZ = spu_and( negTrace, spu_or( YgtX, ZgtX ) );
-    largestZorX = spu_and( negTrace, spu_orc( ZgtY, YgtX ) );
-    
-    zz = spu_sel( zz, negatef4(zz), largestXorY );
-    xy = spu_sel( xy, negatef4(xy), largestXorY );
-    xx = spu_sel( xx, negatef4(xx), largestYorZ );
-    yz = spu_sel( yz, negatef4(yz), largestYorZ );
-    yy = spu_sel( yy, negatef4(yy), largestZorX );
-    zx = spu_sel( zx, negatef4(zx), largestZorX );
-
-    radicand = spu_add( spu_add( spu_add( xx, yy ), zz ), spu_splats(1.0f) );
-    scale = spu_mul( spu_splats(0.5f), rsqrtf4( radicand ) );
-
-    tmpx = spu_mul( spu_sub( zy, yz ), scale );
-    tmpy = spu_mul( spu_sub( xz, zx ), scale );
-    tmpz = spu_mul( spu_sub( yx, xy ), scale );
-    tmpw = spu_mul( radicand, scale );
-    qx = tmpx;
-    qy = tmpy;
-    qz = tmpz;
-    qw = tmpw;
-
-    qx = spu_sel( qx, tmpw, largestXorY );
-    qy = spu_sel( qy, tmpz, largestXorY );
-    qz = spu_sel( qz, tmpy, largestXorY );
-    qw = spu_sel( qw, tmpx, largestXorY );
-    tmpx = qx;
-    tmpz = qz;
-    qx = spu_sel( qx, qy, largestYorZ );
-    qy = spu_sel( qy, tmpx, largestYorZ );
-    qz = spu_sel( qz, qw, largestYorZ );
-    qw = spu_sel( qw, tmpz, largestYorZ );
-
-    result->x = qx;
-    result->y = qy;
-    result->z = qz;
-    result->w = qw;
-}
-
-static inline void vmathSoaV3Outer( VmathSoaMatrix3 *result, const VmathSoaVector3 *tfrm0, const VmathSoaVector3 *tfrm1 )
-{
-    vmathSoaV3ScalarMul( &result->col0, tfrm0, vmathSoaV3GetX( tfrm1 ) );
-    vmathSoaV3ScalarMul( &result->col1, tfrm0, vmathSoaV3GetY( tfrm1 ) );
-    vmathSoaV3ScalarMul( &result->col2, tfrm0, vmathSoaV3GetZ( tfrm1 ) );
-}
-
-static inline void vmathSoaV4Outer( VmathSoaMatrix4 *result, const VmathSoaVector4 *tfrm0, const VmathSoaVector4 *tfrm1 )
-{
-    vmathSoaV4ScalarMul( &result->col0, tfrm0, vmathSoaV4GetX( tfrm1 ) );
-    vmathSoaV4ScalarMul( &result->col1, tfrm0, vmathSoaV4GetY( tfrm1 ) );
-    vmathSoaV4ScalarMul( &result->col2, tfrm0, vmathSoaV4GetZ( tfrm1 ) );
-    vmathSoaV4ScalarMul( &result->col3, tfrm0, vmathSoaV4GetW( tfrm1 ) );
-}
-
-static inline void vmathSoaV3RowMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat )
-{
-    vec_float4 tmpX, tmpY, tmpZ;
-    tmpX = spu_add( spu_add( spu_mul( vec->x, mat->col0.x ), spu_mul( vec->y, mat->col0.y ) ), spu_mul( vec->z, mat->col0.z ) );
-    tmpY = spu_add( spu_add( spu_mul( vec->x, mat->col1.x ), spu_mul( vec->y, mat->col1.y ) ), spu_mul( vec->z, mat->col1.z ) );
-    tmpZ = spu_add( spu_add( spu_mul( vec->x, mat->col2.x ), spu_mul( vec->y, mat->col2.y ) ), spu_mul( vec->z, mat->col2.z ) );
-    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathSoaV3CrossMatrix( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec )
-{
-    vmathSoaV3MakeFromElems( &result->col0, spu_splats(0.0f), vec->z, negatef4( vec->y ) );
-    vmathSoaV3MakeFromElems( &result->col1, negatef4( vec->z ), spu_splats(0.0f), vec->x );
-    vmathSoaV3MakeFromElems( &result->col2, vec->y, negatef4( vec->x ), spu_splats(0.0f) );
-}
-
-static inline void vmathSoaV3CrossMatrixMul( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat )
-{
-    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2;
-    vmathSoaV3Cross( &tmpV3_0, vec, &mat->col0 );
-    vmathSoaV3Cross( &tmpV3_1, vec, &mat->col1 );
-    vmathSoaV3Cross( &tmpV3_2, vec, &mat->col2 );
-    vmathSoaM3MakeFromCols( result, &tmpV3_0, &tmpV3_1, &tmpV3_2 );
-}
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_SOA_C_H
+#define _VECTORMATH_MAT_SOA_C_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ */
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+static inline void vmathSoaM3Copy( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3Copy( &result->col0, &mat->col0 );
+    vmathSoaV3Copy( &result->col1, &mat->col1 );
+    vmathSoaV3Copy( &result->col2, &mat->col2 );
+}
+
+static inline void vmathSoaM3MakeFromScalar( VmathSoaMatrix3 *result, vec_float4 scalar )
+{
+    vmathSoaV3MakeFromScalar( &result->col0, scalar );
+    vmathSoaV3MakeFromScalar( &result->col1, scalar );
+    vmathSoaV3MakeFromScalar( &result->col2, scalar );
+}
+
+static inline void vmathSoaM3MakeFromQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat )
+{
+    vec_float4 qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
+    qx = unitQuat->x;
+    qy = unitQuat->y;
+    qz = unitQuat->z;
+    qw = unitQuat->w;
+    qx2 = spu_add( qx, qx );
+    qy2 = spu_add( qy, qy );
+    qz2 = spu_add( qz, qz );
+    qxqx2 = spu_mul( qx, qx2 );
+    qxqy2 = spu_mul( qx, qy2 );
+    qxqz2 = spu_mul( qx, qz2 );
+    qxqw2 = spu_mul( qw, qx2 );
+    qyqy2 = spu_mul( qy, qy2 );
+    qyqz2 = spu_mul( qy, qz2 );
+    qyqw2 = spu_mul( qw, qy2 );
+    qzqz2 = spu_mul( qz, qz2 );
+    qzqw2 = spu_mul( qw, qz2 );
+    vmathSoaV3MakeFromElems( &result->col0, spu_sub( spu_sub( spu_splats(1.0f), qyqy2 ), qzqz2 ), spu_add( qxqy2, qzqw2 ), spu_sub( qxqz2, qyqw2 ) );
+    vmathSoaV3MakeFromElems( &result->col1, spu_sub( qxqy2, qzqw2 ), spu_sub( spu_sub( spu_splats(1.0f), qxqx2 ), qzqz2 ), spu_add( qyqz2, qxqw2 ) );
+    vmathSoaV3MakeFromElems( &result->col2, spu_add( qxqz2, qyqw2 ), spu_sub( qyqz2, qxqw2 ), spu_sub( spu_sub( spu_splats(1.0f), qxqx2 ), qyqy2 ) );
+}
+
+static inline void vmathSoaM3MakeFromCols( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col0, const VmathSoaVector3 *_col1, const VmathSoaVector3 *_col2 )
+{
+    vmathSoaV3Copy( &result->col0, _col0 );
+    vmathSoaV3Copy( &result->col1, _col1 );
+    vmathSoaV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathSoaM3MakeFromAos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vmathSoaV3MakeFromAos( &result->col0, &mat->col0 );
+    vmathSoaV3MakeFromAos( &result->col1, &mat->col1 );
+    vmathSoaV3MakeFromAos( &result->col2, &mat->col2 );
+}
+
+static inline void vmathSoaM3MakeFrom4Aos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, const VmathMatrix3 *mat2, const VmathMatrix3 *mat3 )
+{
+    vmathSoaV3MakeFrom4Aos( &result->col0, &mat0->col0, &mat1->col0, &mat2->col0, &mat3->col0 );
+    vmathSoaV3MakeFrom4Aos( &result->col1, &mat0->col1, &mat1->col1, &mat2->col1, &mat3->col1 );
+    vmathSoaV3MakeFrom4Aos( &result->col2, &mat0->col2, &mat1->col2, &mat2->col2, &mat3->col2 );
+}
+
+static inline void vmathSoaM3Get4Aos( const VmathSoaMatrix3 *mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 )
+{
+    vmathSoaV3Get4Aos( &mat->col0, &result0->col0, &result1->col0, &result2->col0, &result3->col0 );
+    vmathSoaV3Get4Aos( &mat->col1, &result0->col1, &result1->col1, &result2->col1, &result3->col1 );
+    vmathSoaV3Get4Aos( &mat->col2, &result0->col2, &result1->col2, &result2->col2, &result3->col2 );
+}
+
+static inline void vmathSoaM3SetCol0( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col0 )
+{
+    vmathSoaV3Copy( &result->col0, _col0 );
+}
+
+static inline void vmathSoaM3SetCol1( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col1 )
+{
+    vmathSoaV3Copy( &result->col1, _col1 );
+}
+
+static inline void vmathSoaM3SetCol2( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col2 )
+{
+    vmathSoaV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathSoaM3SetCol( VmathSoaMatrix3 *result, int col, const VmathSoaVector3 *vec )
+{
+    vmathSoaV3Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathSoaM3SetRow( VmathSoaMatrix3 *result, int row, const VmathSoaVector3 *vec )
+{
+    vmathSoaV3SetElem( &result->col0, row, vmathSoaV3GetElem( vec, 0 ) );
+    vmathSoaV3SetElem( &result->col1, row, vmathSoaV3GetElem( vec, 1 ) );
+    vmathSoaV3SetElem( &result->col2, row, vmathSoaV3GetElem( vec, 2 ) );
+}
+
+static inline void vmathSoaM3SetElem( VmathSoaMatrix3 *result, int col, int row, vec_float4 val )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaM3GetCol( &tmpV3_0, result, col );
+    vmathSoaV3SetElem( &tmpV3_0, row, val );
+    vmathSoaM3SetCol( result, col, &tmpV3_0 );
+}
+
+static inline vec_float4 vmathSoaM3GetElem( const VmathSoaMatrix3 *mat, int col, int row )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaM3GetCol( &tmpV3_0, mat, col );
+    return vmathSoaV3GetElem( &tmpV3_0, row );
+}
+
+static inline void vmathSoaM3GetCol0( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3Copy( result, &mat->col0 );
+}
+
+static inline void vmathSoaM3GetCol1( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3Copy( result, &mat->col1 );
+}
+
+static inline void vmathSoaM3GetCol2( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3Copy( result, &mat->col2 );
+}
+
+static inline void vmathSoaM3GetCol( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int col )
+{
+    vmathSoaV3Copy( result, (&mat->col0 + col) );
+}
+
+static inline void vmathSoaM3GetRow( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int row )
+{
+    vmathSoaV3MakeFromElems( result, vmathSoaV3GetElem( &mat->col0, row ), vmathSoaV3GetElem( &mat->col1, row ), vmathSoaV3GetElem( &mat->col2, row ) );
+}
+
+static inline void vmathSoaM3Transpose( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
+{
+    VmathSoaMatrix3 tmpResult;
+    vmathSoaV3MakeFromElems( &tmpResult.col0, mat->col0.x, mat->col1.x, mat->col2.x );
+    vmathSoaV3MakeFromElems( &tmpResult.col1, mat->col0.y, mat->col1.y, mat->col2.y );
+    vmathSoaV3MakeFromElems( &tmpResult.col2, mat->col0.z, mat->col1.z, mat->col2.z );
+    vmathSoaM3Copy( result, &tmpResult );
+}
+
+static inline void vmathSoaM3Inverse( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
+{
+    VmathSoaVector3 tmp0, tmp1, tmp2;
+    vec_float4 detinv;
+    vmathSoaV3Cross( &tmp0, &mat->col1, &mat->col2 );
+    vmathSoaV3Cross( &tmp1, &mat->col2, &mat->col0 );
+    vmathSoaV3Cross( &tmp2, &mat->col0, &mat->col1 );
+    detinv = recipf4( vmathSoaV3Dot( &mat->col2, &tmp2 ) );
+    vmathSoaV3MakeFromElems( &result->col0, spu_mul( tmp0.x, detinv ), spu_mul( tmp1.x, detinv ), spu_mul( tmp2.x, detinv ) );
+    vmathSoaV3MakeFromElems( &result->col1, spu_mul( tmp0.y, detinv ), spu_mul( tmp1.y, detinv ), spu_mul( tmp2.y, detinv ) );
+    vmathSoaV3MakeFromElems( &result->col2, spu_mul( tmp0.z, detinv ), spu_mul( tmp1.z, detinv ), spu_mul( tmp2.z, detinv ) );
+}
+
+static inline vec_float4 vmathSoaM3Determinant( const VmathSoaMatrix3 *mat )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaV3Cross( &tmpV3_0, &mat->col0, &mat->col1 );
+    return vmathSoaV3Dot( &mat->col2, &tmpV3_0 );
+}
+
+static inline void vmathSoaM3Add( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
+{
+    vmathSoaV3Add( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathSoaV3Add( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathSoaV3Add( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathSoaM3Sub( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
+{
+    vmathSoaV3Sub( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathSoaV3Sub( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathSoaV3Sub( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathSoaM3Neg( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3Neg( &result->col0, &mat->col0 );
+    vmathSoaV3Neg( &result->col1, &mat->col1 );
+    vmathSoaV3Neg( &result->col2, &mat->col2 );
+}
+
+static inline void vmathSoaM3AbsPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3AbsPerElem( &result->col0, &mat->col0 );
+    vmathSoaV3AbsPerElem( &result->col1, &mat->col1 );
+    vmathSoaV3AbsPerElem( &result->col2, &mat->col2 );
+}
+
+static inline void vmathSoaM3ScalarMul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, vec_float4 scalar )
+{
+    vmathSoaV3ScalarMul( &result->col0, &mat->col0, scalar );
+    vmathSoaV3ScalarMul( &result->col1, &mat->col1, scalar );
+    vmathSoaV3ScalarMul( &result->col2, &mat->col2, scalar );
+}
+
+static inline void vmathSoaM3MulV3( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *vec )
+{
+    vec_float4 tmpX, tmpY, tmpZ;
+    tmpX = spu_add( spu_add( spu_mul( mat->col0.x, vec->x ), spu_mul( mat->col1.x, vec->y ) ), spu_mul( mat->col2.x, vec->z ) );
+    tmpY = spu_add( spu_add( spu_mul( mat->col0.y, vec->x ), spu_mul( mat->col1.y, vec->y ) ), spu_mul( mat->col2.y, vec->z ) );
+    tmpZ = spu_add( spu_add( spu_mul( mat->col0.z, vec->x ), spu_mul( mat->col1.z, vec->y ) ), spu_mul( mat->col2.z, vec->z ) );
+    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathSoaM3Mul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
+{
+    VmathSoaMatrix3 tmpResult;
+    vmathSoaM3MulV3( &tmpResult.col0, mat0, &mat1->col0 );
+    vmathSoaM3MulV3( &tmpResult.col1, mat0, &mat1->col1 );
+    vmathSoaM3MulV3( &tmpResult.col2, mat0, &mat1->col2 );
+    vmathSoaM3Copy( result, &tmpResult );
+}
+
+static inline void vmathSoaM3MulPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
+{
+    vmathSoaV3MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathSoaV3MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathSoaV3MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathSoaM3MakeIdentity( VmathSoaMatrix3 *result )
+{
+    vmathSoaV3MakeXAxis( &result->col0 );
+    vmathSoaV3MakeYAxis( &result->col1 );
+    vmathSoaV3MakeZAxis( &result->col2 );
+}
+
+static inline void vmathSoaM3MakeRotationX( VmathSoaMatrix3 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV3MakeXAxis( &result->col0 );
+    vmathSoaV3MakeFromElems( &result->col1, spu_splats(0.0f), c, s );
+    vmathSoaV3MakeFromElems( &result->col2, spu_splats(0.0f), negatef4( s ), c );
+}
+
+static inline void vmathSoaM3MakeRotationY( VmathSoaMatrix3 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV3MakeFromElems( &result->col0, c, spu_splats(0.0f), negatef4( s ) );
+    vmathSoaV3MakeYAxis( &result->col1 );
+    vmathSoaV3MakeFromElems( &result->col2, s, spu_splats(0.0f), c );
+}
+
+static inline void vmathSoaM3MakeRotationZ( VmathSoaMatrix3 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV3MakeFromElems( &result->col0, c, s, spu_splats(0.0f) );
+    vmathSoaV3MakeFromElems( &result->col1, negatef4( s ), c, spu_splats(0.0f) );
+    vmathSoaV3MakeZAxis( &result->col2 );
+}
+
+static inline void vmathSoaM3MakeRotationZYX( VmathSoaMatrix3 *result, const VmathSoaVector3 *radiansXYZ )
+{
+    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sincosf4( radiansXYZ->x, &sX, &cX );
+    sincosf4( radiansXYZ->y, &sY, &cY );
+    sincosf4( radiansXYZ->z, &sZ, &cZ );
+    tmp0 = spu_mul( cZ, sY );
+    tmp1 = spu_mul( sZ, sY );
+    vmathSoaV3MakeFromElems( &result->col0, spu_mul( cZ, cY ), spu_mul( sZ, cY ), negatef4( sY ) );
+    vmathSoaV3MakeFromElems( &result->col1, spu_sub( spu_mul( tmp0, sX ), spu_mul( sZ, cX ) ), spu_add( spu_mul( tmp1, sX ), spu_mul( cZ, cX ) ), spu_mul( cY, sX ) );
+    vmathSoaV3MakeFromElems( &result->col2, spu_add( spu_mul( tmp0, cX ), spu_mul( sZ, sX ) ), spu_sub( spu_mul( tmp1, cX ), spu_mul( cZ, sX ) ), spu_mul( cY, cX ) );
+}
+
+static inline void vmathSoaM3MakeRotationAxis( VmathSoaMatrix3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
+{
+    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
+    sincosf4( radians, &s, &c );
+    x = unitVec->x;
+    y = unitVec->y;
+    z = unitVec->z;
+    xy = spu_mul( x, y );
+    yz = spu_mul( y, z );
+    zx = spu_mul( z, x );
+    oneMinusC = spu_sub( spu_splats(1.0f), c );
+    vmathSoaV3MakeFromElems( &result->col0, spu_add( spu_mul( spu_mul( x, x ), oneMinusC ), c ), spu_add( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_sub( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ) );
+    vmathSoaV3MakeFromElems( &result->col1, spu_sub( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_add( spu_mul( spu_mul( y, y ), oneMinusC ), c ), spu_add( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ) );
+    vmathSoaV3MakeFromElems( &result->col2, spu_add( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ), spu_sub( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ), spu_add( spu_mul( spu_mul( z, z ), oneMinusC ), c ) );
+}
+
+static inline void vmathSoaM3MakeRotationQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat )
+{
+    vmathSoaM3MakeFromQ( result, unitQuat );
+}
+
+static inline void vmathSoaM3MakeScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec )
+{
+    vmathSoaV3MakeFromElems( &result->col0, scaleVec->x, spu_splats(0.0f), spu_splats(0.0f) );
+    vmathSoaV3MakeFromElems( &result->col1, spu_splats(0.0f), scaleVec->y, spu_splats(0.0f) );
+    vmathSoaV3MakeFromElems( &result->col2, spu_splats(0.0f), spu_splats(0.0f), scaleVec->z );
+}
+
+static inline void vmathSoaM3AppendScale( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *scaleVec )
+{
+    vmathSoaV3ScalarMul( &result->col0, &mat->col0, vmathSoaV3GetX( scaleVec ) );
+    vmathSoaV3ScalarMul( &result->col1, &mat->col1, vmathSoaV3GetY( scaleVec ) );
+    vmathSoaV3ScalarMul( &result->col2, &mat->col2, vmathSoaV3GetZ( scaleVec ) );
+}
+
+static inline void vmathSoaM3PrependScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3MulPerElem( &result->col0, &mat->col0, scaleVec );
+    vmathSoaV3MulPerElem( &result->col1, &mat->col1, scaleVec );
+    vmathSoaV3MulPerElem( &result->col2, &mat->col2, scaleVec );
+}
+
+static inline void vmathSoaM3Select( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1, vec_uint4 select1 )
+{
+    vmathSoaV3Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
+    vmathSoaV3Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
+    vmathSoaV3Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaM3Print( const VmathSoaMatrix3 *mat )
+{
+    VmathMatrix3 mat0, mat1, mat2, mat3;
+    vmathSoaM3Get4Aos( mat, &mat0, &mat1, &mat2, &mat3 );
+    printf("slot 0:\n");
+    vmathM3Print( &mat0 );
+    printf("slot 1:\n");
+    vmathM3Print( &mat1 );
+    printf("slot 2:\n");
+    vmathM3Print( &mat2 );
+    printf("slot 3:\n");
+    vmathM3Print( &mat3 );
+}
+
+static inline void vmathSoaM3Prints( const VmathSoaMatrix3 *mat, const char *name )
+{
+    printf("%s:\n", name);
+    vmathSoaM3Print( mat );
+}
+
+#endif
+
+static inline void vmathSoaM4Copy( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4Copy( &result->col0, &mat->col0 );
+    vmathSoaV4Copy( &result->col1, &mat->col1 );
+    vmathSoaV4Copy( &result->col2, &mat->col2 );
+    vmathSoaV4Copy( &result->col3, &mat->col3 );
+}
+
+static inline void vmathSoaM4MakeFromScalar( VmathSoaMatrix4 *result, vec_float4 scalar )
+{
+    vmathSoaV4MakeFromScalar( &result->col0, scalar );
+    vmathSoaV4MakeFromScalar( &result->col1, scalar );
+    vmathSoaV4MakeFromScalar( &result->col2, scalar );
+    vmathSoaV4MakeFromScalar( &result->col3, scalar );
+}
+
+static inline void vmathSoaM4MakeFromT3( VmathSoaMatrix4 *result, const VmathSoaTransform3 *mat )
+{
+    vmathSoaV4MakeFromV3Scalar( &result->col0, &mat->col0, spu_splats(0.0f) );
+    vmathSoaV4MakeFromV3Scalar( &result->col1, &mat->col1, spu_splats(0.0f) );
+    vmathSoaV4MakeFromV3Scalar( &result->col2, &mat->col2, spu_splats(0.0f) );
+    vmathSoaV4MakeFromV3Scalar( &result->col3, &mat->col3, spu_splats(1.0f) );
+}
+
+static inline void vmathSoaM4MakeFromCols( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col0, const VmathSoaVector4 *_col1, const VmathSoaVector4 *_col2, const VmathSoaVector4 *_col3 )
+{
+    vmathSoaV4Copy( &result->col0, _col0 );
+    vmathSoaV4Copy( &result->col1, _col1 );
+    vmathSoaV4Copy( &result->col2, _col2 );
+    vmathSoaV4Copy( &result->col3, _col3 );
+}
+
+static inline void vmathSoaM4MakeFromM3V3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *translateVec )
+{
+    vmathSoaV4MakeFromV3Scalar( &result->col0, &mat->col0, spu_splats(0.0f) );
+    vmathSoaV4MakeFromV3Scalar( &result->col1, &mat->col1, spu_splats(0.0f) );
+    vmathSoaV4MakeFromV3Scalar( &result->col2, &mat->col2, spu_splats(0.0f) );
+    vmathSoaV4MakeFromV3Scalar( &result->col3, translateVec, spu_splats(1.0f) );
+}
+
+static inline void vmathSoaM4MakeFromQV3( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec )
+{
+    VmathSoaMatrix3 mat;
+    vmathSoaM3MakeFromQ( &mat, unitQuat );
+    vmathSoaV4MakeFromV3Scalar( &result->col0, &mat.col0, spu_splats(0.0f) );
+    vmathSoaV4MakeFromV3Scalar( &result->col1, &mat.col1, spu_splats(0.0f) );
+    vmathSoaV4MakeFromV3Scalar( &result->col2, &mat.col2, spu_splats(0.0f) );
+    vmathSoaV4MakeFromV3Scalar( &result->col3, translateVec, spu_splats(1.0f) );
+}
+
+static inline void vmathSoaM4MakeFromAos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vmathSoaV4MakeFromAos( &result->col0, &mat->col0 );
+    vmathSoaV4MakeFromAos( &result->col1, &mat->col1 );
+    vmathSoaV4MakeFromAos( &result->col2, &mat->col2 );
+    vmathSoaV4MakeFromAos( &result->col3, &mat->col3 );
+}
+
+static inline void vmathSoaM4MakeFrom4Aos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, const VmathMatrix4 *mat2, const VmathMatrix4 *mat3 )
+{
+    vmathSoaV4MakeFrom4Aos( &result->col0, &mat0->col0, &mat1->col0, &mat2->col0, &mat3->col0 );
+    vmathSoaV4MakeFrom4Aos( &result->col1, &mat0->col1, &mat1->col1, &mat2->col1, &mat3->col1 );
+    vmathSoaV4MakeFrom4Aos( &result->col2, &mat0->col2, &mat1->col2, &mat2->col2, &mat3->col2 );
+    vmathSoaV4MakeFrom4Aos( &result->col3, &mat0->col3, &mat1->col3, &mat2->col3, &mat3->col3 );
+}
+
+static inline void vmathSoaM4Get4Aos( const VmathSoaMatrix4 *mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 )
+{
+    vmathSoaV4Get4Aos( &mat->col0, &result0->col0, &result1->col0, &result2->col0, &result3->col0 );
+    vmathSoaV4Get4Aos( &mat->col1, &result0->col1, &result1->col1, &result2->col1, &result3->col1 );
+    vmathSoaV4Get4Aos( &mat->col2, &result0->col2, &result1->col2, &result2->col2, &result3->col2 );
+    vmathSoaV4Get4Aos( &mat->col3, &result0->col3, &result1->col3, &result2->col3, &result3->col3 );
+}
+
+static inline void vmathSoaM4SetCol0( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col0 )
+{
+    vmathSoaV4Copy( &result->col0, _col0 );
+}
+
+static inline void vmathSoaM4SetCol1( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col1 )
+{
+    vmathSoaV4Copy( &result->col1, _col1 );
+}
+
+static inline void vmathSoaM4SetCol2( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col2 )
+{
+    vmathSoaV4Copy( &result->col2, _col2 );
+}
+
+static inline void vmathSoaM4SetCol3( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col3 )
+{
+    vmathSoaV4Copy( &result->col3, _col3 );
+}
+
+static inline void vmathSoaM4SetCol( VmathSoaMatrix4 *result, int col, const VmathSoaVector4 *vec )
+{
+    vmathSoaV4Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathSoaM4SetRow( VmathSoaMatrix4 *result, int row, const VmathSoaVector4 *vec )
+{
+    vmathSoaV4SetElem( &result->col0, row, vmathSoaV4GetElem( vec, 0 ) );
+    vmathSoaV4SetElem( &result->col1, row, vmathSoaV4GetElem( vec, 1 ) );
+    vmathSoaV4SetElem( &result->col2, row, vmathSoaV4GetElem( vec, 2 ) );
+    vmathSoaV4SetElem( &result->col3, row, vmathSoaV4GetElem( vec, 3 ) );
+}
+
+static inline void vmathSoaM4SetElem( VmathSoaMatrix4 *result, int col, int row, vec_float4 val )
+{
+    VmathSoaVector4 tmpV3_0;
+    vmathSoaM4GetCol( &tmpV3_0, result, col );
+    vmathSoaV4SetElem( &tmpV3_0, row, val );
+    vmathSoaM4SetCol( result, col, &tmpV3_0 );
+}
+
+static inline vec_float4 vmathSoaM4GetElem( const VmathSoaMatrix4 *mat, int col, int row )
+{
+    VmathSoaVector4 tmpV4_0;
+    vmathSoaM4GetCol( &tmpV4_0, mat, col );
+    return vmathSoaV4GetElem( &tmpV4_0, row );
+}
+
+static inline void vmathSoaM4GetCol0( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4Copy( result, &mat->col0 );
+}
+
+static inline void vmathSoaM4GetCol1( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4Copy( result, &mat->col1 );
+}
+
+static inline void vmathSoaM4GetCol2( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4Copy( result, &mat->col2 );
+}
+
+static inline void vmathSoaM4GetCol3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4Copy( result, &mat->col3 );
+}
+
+static inline void vmathSoaM4GetCol( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int col )
+{
+    vmathSoaV4Copy( result, (&mat->col0 + col) );
+}
+
+static inline void vmathSoaM4GetRow( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int row )
+{
+    vmathSoaV4MakeFromElems( result, vmathSoaV4GetElem( &mat->col0, row ), vmathSoaV4GetElem( &mat->col1, row ), vmathSoaV4GetElem( &mat->col2, row ), vmathSoaV4GetElem( &mat->col3, row ) );
+}
+
+static inline void vmathSoaM4Transpose( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    VmathSoaMatrix4 tmpResult;
+    vmathSoaV4MakeFromElems( &tmpResult.col0, mat->col0.x, mat->col1.x, mat->col2.x, mat->col3.x );
+    vmathSoaV4MakeFromElems( &tmpResult.col1, mat->col0.y, mat->col1.y, mat->col2.y, mat->col3.y );
+    vmathSoaV4MakeFromElems( &tmpResult.col2, mat->col0.z, mat->col1.z, mat->col2.z, mat->col3.z );
+    vmathSoaV4MakeFromElems( &tmpResult.col3, mat->col0.w, mat->col1.w, mat->col2.w, mat->col3.w );
+    vmathSoaM4Copy( result, &tmpResult );
+}
+
+static inline void vmathSoaM4Inverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    VmathSoaVector4 res0, res1, res2, res3;
+    vec_float4 mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
+    mA = mat->col0.x;
+    mB = mat->col0.y;
+    mC = mat->col0.z;
+    mD = mat->col0.w;
+    mE = mat->col1.x;
+    mF = mat->col1.y;
+    mG = mat->col1.z;
+    mH = mat->col1.w;
+    mI = mat->col2.x;
+    mJ = mat->col2.y;
+    mK = mat->col2.z;
+    mL = mat->col2.w;
+    mM = mat->col3.x;
+    mN = mat->col3.y;
+    mO = mat->col3.z;
+    mP = mat->col3.w;
+    tmp0 = spu_sub( spu_mul( mK, mD ), spu_mul( mC, mL ) );
+    tmp1 = spu_sub( spu_mul( mO, mH ), spu_mul( mG, mP ) );
+    tmp2 = spu_sub( spu_mul( mB, mK ), spu_mul( mJ, mC ) );
+    tmp3 = spu_sub( spu_mul( mF, mO ), spu_mul( mN, mG ) );
+    tmp4 = spu_sub( spu_mul( mJ, mD ), spu_mul( mB, mL ) );
+    tmp5 = spu_sub( spu_mul( mN, mH ), spu_mul( mF, mP ) );
+    vmathSoaV4SetX( &res0, spu_sub( spu_sub( spu_mul( mJ, tmp1 ), spu_mul( mL, tmp3 ) ), spu_mul( mK, tmp5 ) ) );
+    vmathSoaV4SetY( &res0, spu_sub( spu_sub( spu_mul( mN, tmp0 ), spu_mul( mP, tmp2 ) ), spu_mul( mO, tmp4 ) ) );
+    vmathSoaV4SetZ( &res0, spu_sub( spu_add( spu_mul( mD, tmp3 ), spu_mul( mC, tmp5 ) ), spu_mul( mB, tmp1 ) ) );
+    vmathSoaV4SetW( &res0, spu_sub( spu_add( spu_mul( mH, tmp2 ), spu_mul( mG, tmp4 ) ), spu_mul( mF, tmp0 ) ) );
+    detInv = recipf4( spu_add( spu_add( spu_add( spu_mul( mA, res0.x ), spu_mul( mE, res0.y ) ), spu_mul( mI, res0.z ) ), spu_mul( mM, res0.w ) ) );
+    vmathSoaV4SetX( &res1, spu_mul( mI, tmp1 ) );
+    vmathSoaV4SetY( &res1, spu_mul( mM, tmp0 ) );
+    vmathSoaV4SetZ( &res1, spu_mul( mA, tmp1 ) );
+    vmathSoaV4SetW( &res1, spu_mul( mE, tmp0 ) );
+    vmathSoaV4SetX( &res3, spu_mul( mI, tmp3 ) );
+    vmathSoaV4SetY( &res3, spu_mul( mM, tmp2 ) );
+    vmathSoaV4SetZ( &res3, spu_mul( mA, tmp3 ) );
+    vmathSoaV4SetW( &res3, spu_mul( mE, tmp2 ) );
+    vmathSoaV4SetX( &res2, spu_mul( mI, tmp5 ) );
+    vmathSoaV4SetY( &res2, spu_mul( mM, tmp4 ) );
+    vmathSoaV4SetZ( &res2, spu_mul( mA, tmp5 ) );
+    vmathSoaV4SetW( &res2, spu_mul( mE, tmp4 ) );
+    tmp0 = spu_sub( spu_mul( mI, mB ), spu_mul( mA, mJ ) );
+    tmp1 = spu_sub( spu_mul( mM, mF ), spu_mul( mE, mN ) );
+    tmp2 = spu_sub( spu_mul( mI, mD ), spu_mul( mA, mL ) );
+    tmp3 = spu_sub( spu_mul( mM, mH ), spu_mul( mE, mP ) );
+    tmp4 = spu_sub( spu_mul( mI, mC ), spu_mul( mA, mK ) );
+    tmp5 = spu_sub( spu_mul( mM, mG ), spu_mul( mE, mO ) );
+    vmathSoaV4SetX( &res2, spu_add( spu_sub( spu_mul( mL, tmp1 ), spu_mul( mJ, tmp3 ) ), res2.x ) );
+    vmathSoaV4SetY( &res2, spu_add( spu_sub( spu_mul( mP, tmp0 ), spu_mul( mN, tmp2 ) ), res2.y ) );
+    vmathSoaV4SetZ( &res2, spu_sub( spu_sub( spu_mul( mB, tmp3 ), spu_mul( mD, tmp1 ) ), res2.z ) );
+    vmathSoaV4SetW( &res2, spu_sub( spu_sub( spu_mul( mF, tmp2 ), spu_mul( mH, tmp0 ) ), res2.w ) );
+    vmathSoaV4SetX( &res3, spu_add( spu_sub( spu_mul( mJ, tmp5 ), spu_mul( mK, tmp1 ) ), res3.x ) );
+    vmathSoaV4SetY( &res3, spu_add( spu_sub( spu_mul( mN, tmp4 ), spu_mul( mO, tmp0 ) ), res3.y ) );
+    vmathSoaV4SetZ( &res3, spu_sub( spu_sub( spu_mul( mC, tmp1 ), spu_mul( mB, tmp5 ) ), res3.z ) );
+    vmathSoaV4SetW( &res3, spu_sub( spu_sub( spu_mul( mG, tmp0 ), spu_mul( mF, tmp4 ) ), res3.w ) );
+    vmathSoaV4SetX( &res1, spu_sub( spu_sub( spu_mul( mK, tmp3 ), spu_mul( mL, tmp5 ) ), res1.x ) );
+    vmathSoaV4SetY( &res1, spu_sub( spu_sub( spu_mul( mO, tmp2 ), spu_mul( mP, tmp4 ) ), res1.y ) );
+    vmathSoaV4SetZ( &res1, spu_add( spu_sub( spu_mul( mD, tmp5 ), spu_mul( mC, tmp3 ) ), res1.z ) );
+    vmathSoaV4SetW( &res1, spu_add( spu_sub( spu_mul( mH, tmp4 ), spu_mul( mG, tmp2 ) ), res1.w ) );
+    vmathSoaV4ScalarMul( &result->col0, &res0, detInv );
+    vmathSoaV4ScalarMul( &result->col1, &res1, detInv );
+    vmathSoaV4ScalarMul( &result->col2, &res2, detInv );
+    vmathSoaV4ScalarMul( &result->col3, &res3, detInv );
+}
+
+static inline void vmathSoaM4AffineInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    VmathSoaTransform3 affineMat, tmpT3_0;
+    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    vmathSoaV4GetXYZ( &tmpV3_0, &mat->col0 );
+    vmathSoaT3SetCol0( &affineMat, &tmpV3_0 );
+    vmathSoaV4GetXYZ( &tmpV3_1, &mat->col1 );
+    vmathSoaT3SetCol1( &affineMat, &tmpV3_1 );
+    vmathSoaV4GetXYZ( &tmpV3_2, &mat->col2 );
+    vmathSoaT3SetCol2( &affineMat, &tmpV3_2 );
+    vmathSoaV4GetXYZ( &tmpV3_3, &mat->col3 );
+    vmathSoaT3SetCol3( &affineMat, &tmpV3_3 );
+    vmathSoaT3Inverse( &tmpT3_0, &affineMat );
+    vmathSoaM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline void vmathSoaM4OrthoInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    VmathSoaTransform3 affineMat, tmpT3_0;
+    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    vmathSoaV4GetXYZ( &tmpV3_0, &mat->col0 );
+    vmathSoaT3SetCol0( &affineMat, &tmpV3_0 );
+    vmathSoaV4GetXYZ( &tmpV3_1, &mat->col1 );
+    vmathSoaT3SetCol1( &affineMat, &tmpV3_1 );
+    vmathSoaV4GetXYZ( &tmpV3_2, &mat->col2 );
+    vmathSoaT3SetCol2( &affineMat, &tmpV3_2 );
+    vmathSoaV4GetXYZ( &tmpV3_3, &mat->col3 );
+    vmathSoaT3SetCol3( &affineMat, &tmpV3_3 );
+    vmathSoaT3OrthoInverse( &tmpT3_0, &affineMat );
+    vmathSoaM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline vec_float4 vmathSoaM4Determinant( const VmathSoaMatrix4 *mat )
+{
+    vec_float4 dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    mA = mat->col0.x;
+    mB = mat->col0.y;
+    mC = mat->col0.z;
+    mD = mat->col0.w;
+    mE = mat->col1.x;
+    mF = mat->col1.y;
+    mG = mat->col1.z;
+    mH = mat->col1.w;
+    mI = mat->col2.x;
+    mJ = mat->col2.y;
+    mK = mat->col2.z;
+    mL = mat->col2.w;
+    mM = mat->col3.x;
+    mN = mat->col3.y;
+    mO = mat->col3.z;
+    mP = mat->col3.w;
+    tmp0 = spu_sub( spu_mul( mK, mD ), spu_mul( mC, mL ) );
+    tmp1 = spu_sub( spu_mul( mO, mH ), spu_mul( mG, mP ) );
+    tmp2 = spu_sub( spu_mul( mB, mK ), spu_mul( mJ, mC ) );
+    tmp3 = spu_sub( spu_mul( mF, mO ), spu_mul( mN, mG ) );
+    tmp4 = spu_sub( spu_mul( mJ, mD ), spu_mul( mB, mL ) );
+    tmp5 = spu_sub( spu_mul( mN, mH ), spu_mul( mF, mP ) );
+    dx = spu_sub( spu_sub( spu_mul( mJ, tmp1 ), spu_mul( mL, tmp3 ) ), spu_mul( mK, tmp5 ) );
+    dy = spu_sub( spu_sub( spu_mul( mN, tmp0 ), spu_mul( mP, tmp2 ) ), spu_mul( mO, tmp4 ) );
+    dz = spu_sub( spu_add( spu_mul( mD, tmp3 ), spu_mul( mC, tmp5 ) ), spu_mul( mB, tmp1 ) );
+    dw = spu_sub( spu_add( spu_mul( mH, tmp2 ), spu_mul( mG, tmp4 ) ), spu_mul( mF, tmp0 ) );
+    return spu_add( spu_add( spu_add( spu_mul( mA, dx ), spu_mul( mE, dy ) ), spu_mul( mI, dz ) ), spu_mul( mM, dw ) );
+}
+
+static inline void vmathSoaM4Add( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
+{
+    vmathSoaV4Add( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathSoaV4Add( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathSoaV4Add( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathSoaV4Add( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathSoaM4Sub( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
+{
+    vmathSoaV4Sub( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathSoaV4Sub( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathSoaV4Sub( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathSoaV4Sub( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathSoaM4Neg( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4Neg( &result->col0, &mat->col0 );
+    vmathSoaV4Neg( &result->col1, &mat->col1 );
+    vmathSoaV4Neg( &result->col2, &mat->col2 );
+    vmathSoaV4Neg( &result->col3, &mat->col3 );
+}
+
+static inline void vmathSoaM4AbsPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4AbsPerElem( &result->col0, &mat->col0 );
+    vmathSoaV4AbsPerElem( &result->col1, &mat->col1 );
+    vmathSoaV4AbsPerElem( &result->col2, &mat->col2 );
+    vmathSoaV4AbsPerElem( &result->col3, &mat->col3 );
+}
+
+static inline void vmathSoaM4ScalarMul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, vec_float4 scalar )
+{
+    vmathSoaV4ScalarMul( &result->col0, &mat->col0, scalar );
+    vmathSoaV4ScalarMul( &result->col1, &mat->col1, scalar );
+    vmathSoaV4ScalarMul( &result->col2, &mat->col2, scalar );
+    vmathSoaV4ScalarMul( &result->col3, &mat->col3, scalar );
+}
+
+static inline void vmathSoaM4MulV4( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector4 *vec )
+{
+    vec_float4 tmpX, tmpY, tmpZ, tmpW;
+    tmpX = spu_add( spu_add( spu_add( spu_mul( mat->col0.x, vec->x ), spu_mul( mat->col1.x, vec->y ) ), spu_mul( mat->col2.x, vec->z ) ), spu_mul( mat->col3.x, vec->w ) );
+    tmpY = spu_add( spu_add( spu_add( spu_mul( mat->col0.y, vec->x ), spu_mul( mat->col1.y, vec->y ) ), spu_mul( mat->col2.y, vec->z ) ), spu_mul( mat->col3.y, vec->w ) );
+    tmpZ = spu_add( spu_add( spu_add( spu_mul( mat->col0.z, vec->x ), spu_mul( mat->col1.z, vec->y ) ), spu_mul( mat->col2.z, vec->z ) ), spu_mul( mat->col3.z, vec->w ) );
+    tmpW = spu_add( spu_add( spu_add( spu_mul( mat->col0.w, vec->x ), spu_mul( mat->col1.w, vec->y ) ), spu_mul( mat->col2.w, vec->z ) ), spu_mul( mat->col3.w, vec->w ) );
+    vmathSoaV4MakeFromElems( result, tmpX, tmpY, tmpZ, tmpW );
+}
+
+static inline void vmathSoaM4MulV3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *vec )
+{
+    result->x = spu_add( spu_add( spu_mul( mat->col0.x, vec->x ), spu_mul( mat->col1.x, vec->y ) ), spu_mul( mat->col2.x, vec->z ) );
+    result->y = spu_add( spu_add( spu_mul( mat->col0.y, vec->x ), spu_mul( mat->col1.y, vec->y ) ), spu_mul( mat->col2.y, vec->z ) );
+    result->z = spu_add( spu_add( spu_mul( mat->col0.z, vec->x ), spu_mul( mat->col1.z, vec->y ) ), spu_mul( mat->col2.z, vec->z ) );
+    result->w = spu_add( spu_add( spu_mul( mat->col0.w, vec->x ), spu_mul( mat->col1.w, vec->y ) ), spu_mul( mat->col2.w, vec->z ) );
+}
+
+static inline void vmathSoaM4MulP3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaPoint3 *pnt )
+{
+    result->x = spu_add( spu_add( spu_add( spu_mul( mat->col0.x, pnt->x ), spu_mul( mat->col1.x, pnt->y ) ), spu_mul( mat->col2.x, pnt->z ) ), mat->col3.x );
+    result->y = spu_add( spu_add( spu_add( spu_mul( mat->col0.y, pnt->x ), spu_mul( mat->col1.y, pnt->y ) ), spu_mul( mat->col2.y, pnt->z ) ), mat->col3.y );
+    result->z = spu_add( spu_add( spu_add( spu_mul( mat->col0.z, pnt->x ), spu_mul( mat->col1.z, pnt->y ) ), spu_mul( mat->col2.z, pnt->z ) ), mat->col3.z );
+    result->w = spu_add( spu_add( spu_add( spu_mul( mat->col0.w, pnt->x ), spu_mul( mat->col1.w, pnt->y ) ), spu_mul( mat->col2.w, pnt->z ) ), mat->col3.w );
+}
+
+static inline void vmathSoaM4Mul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
+{
+    VmathSoaMatrix4 tmpResult;
+    vmathSoaM4MulV4( &tmpResult.col0, mat0, &mat1->col0 );
+    vmathSoaM4MulV4( &tmpResult.col1, mat0, &mat1->col1 );
+    vmathSoaM4MulV4( &tmpResult.col2, mat0, &mat1->col2 );
+    vmathSoaM4MulV4( &tmpResult.col3, mat0, &mat1->col3 );
+    vmathSoaM4Copy( result, &tmpResult );
+}
+
+static inline void vmathSoaM4MulT3( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaTransform3 *tfrm1 )
+{
+    VmathSoaMatrix4 tmpResult;
+    VmathSoaPoint3 tmpP3_0;
+    vmathSoaM4MulV3( &tmpResult.col0, mat, &tfrm1->col0 );
+    vmathSoaM4MulV3( &tmpResult.col1, mat, &tfrm1->col1 );
+    vmathSoaM4MulV3( &tmpResult.col2, mat, &tfrm1->col2 );
+    vmathSoaP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
+    vmathSoaM4MulP3( &tmpResult.col3, mat, &tmpP3_0 );
+    vmathSoaM4Copy( result, &tmpResult );
+}
+
+static inline void vmathSoaM4MulPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
+{
+    vmathSoaV4MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathSoaV4MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathSoaV4MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathSoaV4MulPerElem( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathSoaM4MakeIdentity( VmathSoaMatrix4 *result )
+{
+    vmathSoaV4MakeXAxis( &result->col0 );
+    vmathSoaV4MakeYAxis( &result->col1 );
+    vmathSoaV4MakeZAxis( &result->col2 );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4SetUpper3x3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat3 )
+{
+    vmathSoaV4SetXYZ( &result->col0, &mat3->col0 );
+    vmathSoaV4SetXYZ( &result->col1, &mat3->col1 );
+    vmathSoaV4SetXYZ( &result->col2, &mat3->col2 );
+}
+
+static inline void vmathSoaM4GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4GetXYZ( &result->col0, &mat->col0 );
+    vmathSoaV4GetXYZ( &result->col1, &mat->col1 );
+    vmathSoaV4GetXYZ( &result->col2, &mat->col2 );
+}
+
+static inline void vmathSoaM4SetTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec )
+{
+    vmathSoaV4SetXYZ( &result->col3, translateVec );
+}
+
+static inline void vmathSoaM4GetTranslation( VmathSoaVector3 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4GetXYZ( result, &mat->col3 );
+}
+
+static inline void vmathSoaM4MakeRotationX( VmathSoaMatrix4 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV4MakeXAxis( &result->col0 );
+    vmathSoaV4MakeFromElems( &result->col1, spu_splats(0.0f), c, s, spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col2, spu_splats(0.0f), negatef4( s ), c, spu_splats(0.0f) );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4MakeRotationY( VmathSoaMatrix4 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV4MakeFromElems( &result->col0, c, spu_splats(0.0f), negatef4( s ), spu_splats(0.0f) );
+    vmathSoaV4MakeYAxis( &result->col1 );
+    vmathSoaV4MakeFromElems( &result->col2, s, spu_splats(0.0f), c, spu_splats(0.0f) );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4MakeRotationZ( VmathSoaMatrix4 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV4MakeFromElems( &result->col0, c, s, spu_splats(0.0f), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col1, negatef4( s ), c, spu_splats(0.0f), spu_splats(0.0f) );
+    vmathSoaV4MakeZAxis( &result->col2 );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4MakeRotationZYX( VmathSoaMatrix4 *result, const VmathSoaVector3 *radiansXYZ )
+{
+    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sincosf4( radiansXYZ->x, &sX, &cX );
+    sincosf4( radiansXYZ->y, &sY, &cY );
+    sincosf4( radiansXYZ->z, &sZ, &cZ );
+    tmp0 = spu_mul( cZ, sY );
+    tmp1 = spu_mul( sZ, sY );
+    vmathSoaV4MakeFromElems( &result->col0, spu_mul( cZ, cY ), spu_mul( sZ, cY ), negatef4( sY ), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col1, spu_sub( spu_mul( tmp0, sX ), spu_mul( sZ, cX ) ), spu_add( spu_mul( tmp1, sX ), spu_mul( cZ, cX ) ), spu_mul( cY, sX ), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col2, spu_add( spu_mul( tmp0, cX ), spu_mul( sZ, sX ) ), spu_sub( spu_mul( tmp1, cX ), spu_mul( cZ, sX ) ), spu_mul( cY, cX ), spu_splats(0.0f) );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4MakeRotationAxis( VmathSoaMatrix4 *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
+{
+    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
+    sincosf4( radians, &s, &c );
+    x = unitVec->x;
+    y = unitVec->y;
+    z = unitVec->z;
+    xy = spu_mul( x, y );
+    yz = spu_mul( y, z );
+    zx = spu_mul( z, x );
+    oneMinusC = spu_sub( spu_splats(1.0f), c );
+    vmathSoaV4MakeFromElems( &result->col0, spu_add( spu_mul( spu_mul( x, x ), oneMinusC ), c ), spu_add( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_sub( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col1, spu_sub( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_add( spu_mul( spu_mul( y, y ), oneMinusC ), c ), spu_add( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col2, spu_add( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ), spu_sub( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ), spu_add( spu_mul( spu_mul( z, z ), oneMinusC ), c ), spu_splats(0.0f) );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4MakeRotationQ( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat )
+{
+    VmathSoaTransform3 tmpT3_0;
+    vmathSoaT3MakeRotationQ( &tmpT3_0, unitQuat );
+    vmathSoaM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline void vmathSoaM4MakeScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec )
+{
+    vmathSoaV4MakeFromElems( &result->col0, scaleVec->x, spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col1, spu_splats(0.0f), scaleVec->y, spu_splats(0.0f), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col2, spu_splats(0.0f), spu_splats(0.0f), scaleVec->z, spu_splats(0.0f) );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4AppendScale( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *scaleVec )
+{
+    vmathSoaV4ScalarMul( &result->col0, &mat->col0, vmathSoaV3GetX( scaleVec ) );
+    vmathSoaV4ScalarMul( &result->col1, &mat->col1, vmathSoaV3GetY( scaleVec ) );
+    vmathSoaV4ScalarMul( &result->col2, &mat->col2, vmathSoaV3GetZ( scaleVec ) );
+    vmathSoaV4Copy( &result->col3, &mat->col3 );
+}
+
+static inline void vmathSoaM4PrependScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix4 *mat )
+{
+    VmathSoaVector4 scale4;
+    vmathSoaV4MakeFromV3Scalar( &scale4, scaleVec, spu_splats(1.0f) );
+    vmathSoaV4MulPerElem( &result->col0, &mat->col0, &scale4 );
+    vmathSoaV4MulPerElem( &result->col1, &mat->col1, &scale4 );
+    vmathSoaV4MulPerElem( &result->col2, &mat->col2, &scale4 );
+    vmathSoaV4MulPerElem( &result->col3, &mat->col3, &scale4 );
+}
+
+static inline void vmathSoaM4MakeTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec )
+{
+    vmathSoaV4MakeXAxis( &result->col0 );
+    vmathSoaV4MakeYAxis( &result->col1 );
+    vmathSoaV4MakeZAxis( &result->col2 );
+    vmathSoaV4MakeFromV3Scalar( &result->col3, translateVec, spu_splats(1.0f) );
+}
+
+static inline void vmathSoaM4MakeLookAt( VmathSoaMatrix4 *result, const VmathSoaPoint3 *eyePos, const VmathSoaPoint3 *lookAtPos, const VmathSoaVector3 *upVec )
+{
+    VmathSoaMatrix4 m4EyeFrame;
+    VmathSoaVector3 v3X, v3Y, v3Z, tmpV3_0, tmpV3_1;
+    VmathSoaVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
+    vmathSoaV3Normalize( &v3Y, upVec );
+    vmathSoaP3Sub( &tmpV3_0, eyePos, lookAtPos );
+    vmathSoaV3Normalize( &v3Z, &tmpV3_0 );
+    vmathSoaV3Cross( &tmpV3_1, &v3Y, &v3Z );
+    vmathSoaV3Normalize( &v3X, &tmpV3_1 );
+    vmathSoaV3Cross( &v3Y, &v3Z, &v3X );
+    vmathSoaV4MakeFromV3( &tmpV4_0, &v3X );
+    vmathSoaV4MakeFromV3( &tmpV4_1, &v3Y );
+    vmathSoaV4MakeFromV3( &tmpV4_2, &v3Z );
+    vmathSoaV4MakeFromP3( &tmpV4_3, eyePos );
+    vmathSoaM4MakeFromCols( &m4EyeFrame, &tmpV4_0, &tmpV4_1, &tmpV4_2, &tmpV4_3 );
+    vmathSoaM4OrthoInverse( result, &m4EyeFrame );
+}
+
+static inline void vmathSoaM4MakePerspective( VmathSoaMatrix4 *result, vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar )
+{
+    vec_float4 f, rangeInv;
+    f = tanf4( spu_sub( spu_splats( _VECTORMATH_PI_OVER_2 ), spu_mul( spu_splats(0.5f), fovyRadians ) ) );
+    rangeInv = recipf4( spu_sub( zNear, zFar ) );
+    vmathSoaV4MakeFromElems( &result->col0, divf4( f, aspect ), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col1, spu_splats(0.0f), f, spu_splats(0.0f), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col2, spu_splats(0.0f), spu_splats(0.0f), spu_mul( spu_add( zNear, zFar ), rangeInv ), spu_splats(-1.0f) );
+    vmathSoaV4MakeFromElems( &result->col3, spu_splats(0.0f), spu_splats(0.0f), spu_mul( spu_mul( spu_mul( zNear, zFar ), rangeInv ), spu_splats(2.0f) ), spu_splats(0.0f) );
+}
+
+static inline void vmathSoaM4MakeFrustum( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
+{
+    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
+    sum_rl = spu_add( right, left );
+    sum_tb = spu_add( top, bottom );
+    sum_nf = spu_add( zNear, zFar );
+    inv_rl = recipf4( spu_sub( right, left ) );
+    inv_tb = recipf4( spu_sub( top, bottom ) );
+    inv_nf = recipf4( spu_sub( zNear, zFar ) );
+    n2 = spu_add( zNear, zNear );
+    vmathSoaV4MakeFromElems( &result->col0, spu_mul( n2, inv_rl ), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col1, spu_splats(0.0f), spu_mul( n2, inv_tb ), spu_splats(0.0f), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col2, spu_mul( sum_rl, inv_rl ), spu_mul( sum_tb, inv_tb ), spu_mul( sum_nf, inv_nf ), spu_splats(-1.0f) );
+    vmathSoaV4MakeFromElems( &result->col3, spu_splats(0.0f), spu_splats(0.0f), spu_mul( spu_mul( n2, inv_nf ), zFar ), spu_splats(0.0f) );
+}
+
+static inline void vmathSoaM4MakeOrthographic( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
+{
+    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
+    sum_rl = spu_add( right, left );
+    sum_tb = spu_add( top, bottom );
+    sum_nf = spu_add( zNear, zFar );
+    inv_rl = recipf4( spu_sub( right, left ) );
+    inv_tb = recipf4( spu_sub( top, bottom ) );
+    inv_nf = recipf4( spu_sub( zNear, zFar ) );
+    vmathSoaV4MakeFromElems( &result->col0, spu_add( inv_rl, inv_rl ), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col1, spu_splats(0.0f), spu_add( inv_tb, inv_tb ), spu_splats(0.0f), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col2, spu_splats(0.0f), spu_splats(0.0f), spu_add( inv_nf, inv_nf ), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col3, spu_mul( negatef4( sum_rl ), inv_rl ), spu_mul( negatef4( sum_tb ), inv_tb ), spu_mul( sum_nf, inv_nf ), spu_splats(1.0f) );
+}
+
+static inline void vmathSoaM4Select( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1, vec_uint4 select1 )
+{
+    vmathSoaV4Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
+    vmathSoaV4Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
+    vmathSoaV4Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
+    vmathSoaV4Select( &result->col3, &mat0->col3, &mat1->col3, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaM4Print( const VmathSoaMatrix4 *mat )
+{
+    VmathMatrix4 mat0, mat1, mat2, mat3;
+    vmathSoaM4Get4Aos( mat, &mat0, &mat1, &mat2, &mat3 );
+    printf("slot 0:\n");
+    vmathM4Print( &mat0 );
+    printf("slot 1:\n");
+    vmathM4Print( &mat1 );
+    printf("slot 2:\n");
+    vmathM4Print( &mat2 );
+    printf("slot 3:\n");
+    vmathM4Print( &mat3 );
+}
+
+static inline void vmathSoaM4Prints( const VmathSoaMatrix4 *mat, const char *name )
+{
+    printf("%s:\n", name);
+    vmathSoaM4Print( mat );
+}
+
+#endif
+
+static inline void vmathSoaT3Copy( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3Copy( &result->col0, &tfrm->col0 );
+    vmathSoaV3Copy( &result->col1, &tfrm->col1 );
+    vmathSoaV3Copy( &result->col2, &tfrm->col2 );
+    vmathSoaV3Copy( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathSoaT3MakeFromScalar( VmathSoaTransform3 *result, vec_float4 scalar )
+{
+    vmathSoaV3MakeFromScalar( &result->col0, scalar );
+    vmathSoaV3MakeFromScalar( &result->col1, scalar );
+    vmathSoaV3MakeFromScalar( &result->col2, scalar );
+    vmathSoaV3MakeFromScalar( &result->col3, scalar );
+}
+
+static inline void vmathSoaT3MakeFromCols( VmathSoaTransform3 *result, const VmathSoaVector3 *_col0, const VmathSoaVector3 *_col1, const VmathSoaVector3 *_col2, const VmathSoaVector3 *_col3 )
+{
+    vmathSoaV3Copy( &result->col0, _col0 );
+    vmathSoaV3Copy( &result->col1, _col1 );
+    vmathSoaV3Copy( &result->col2, _col2 );
+    vmathSoaV3Copy( &result->col3, _col3 );
+}
+
+static inline void vmathSoaT3MakeFromM3V3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *tfrm, const VmathSoaVector3 *translateVec )
+{
+    vmathSoaT3SetUpper3x3( result, tfrm );
+    vmathSoaT3SetTranslation( result, translateVec );
+}
+
+static inline void vmathSoaT3MakeFromQV3( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec )
+{
+    VmathSoaMatrix3 tmpM3_0;
+    vmathSoaM3MakeFromQ( &tmpM3_0, unitQuat );
+    vmathSoaT3SetUpper3x3( result, &tmpM3_0 );
+    vmathSoaT3SetTranslation( result, translateVec );
+}
+
+static inline void vmathSoaT3MakeFromAos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm )
+{
+    vmathSoaV3MakeFromAos( &result->col0, &tfrm->col0 );
+    vmathSoaV3MakeFromAos( &result->col1, &tfrm->col1 );
+    vmathSoaV3MakeFromAos( &result->col2, &tfrm->col2 );
+    vmathSoaV3MakeFromAos( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathSoaT3MakeFrom4Aos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, const VmathTransform3 *tfrm2, const VmathTransform3 *tfrm3 )
+{
+    vmathSoaV3MakeFrom4Aos( &result->col0, &tfrm0->col0, &tfrm1->col0, &tfrm2->col0, &tfrm3->col0 );
+    vmathSoaV3MakeFrom4Aos( &result->col1, &tfrm0->col1, &tfrm1->col1, &tfrm2->col1, &tfrm3->col1 );
+    vmathSoaV3MakeFrom4Aos( &result->col2, &tfrm0->col2, &tfrm1->col2, &tfrm2->col2, &tfrm3->col2 );
+    vmathSoaV3MakeFrom4Aos( &result->col3, &tfrm0->col3, &tfrm1->col3, &tfrm2->col3, &tfrm3->col3 );
+}
+
+static inline void vmathSoaT3Get4Aos( const VmathSoaTransform3 *tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 )
+{
+    vmathSoaV3Get4Aos( &tfrm->col0, &result0->col0, &result1->col0, &result2->col0, &result3->col0 );
+    vmathSoaV3Get4Aos( &tfrm->col1, &result0->col1, &result1->col1, &result2->col1, &result3->col1 );
+    vmathSoaV3Get4Aos( &tfrm->col2, &result0->col2, &result1->col2, &result2->col2, &result3->col2 );
+    vmathSoaV3Get4Aos( &tfrm->col3, &result0->col3, &result1->col3, &result2->col3, &result3->col3 );
+}
+
+static inline void vmathSoaT3SetCol0( VmathSoaTransform3 *result, const VmathSoaVector3 *_col0 )
+{
+    vmathSoaV3Copy( &result->col0, _col0 );
+}
+
+static inline void vmathSoaT3SetCol1( VmathSoaTransform3 *result, const VmathSoaVector3 *_col1 )
+{
+    vmathSoaV3Copy( &result->col1, _col1 );
+}
+
+static inline void vmathSoaT3SetCol2( VmathSoaTransform3 *result, const VmathSoaVector3 *_col2 )
+{
+    vmathSoaV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathSoaT3SetCol3( VmathSoaTransform3 *result, const VmathSoaVector3 *_col3 )
+{
+    vmathSoaV3Copy( &result->col3, _col3 );
+}
+
+static inline void vmathSoaT3SetCol( VmathSoaTransform3 *result, int col, const VmathSoaVector3 *vec )
+{
+    vmathSoaV3Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathSoaT3SetRow( VmathSoaTransform3 *result, int row, const VmathSoaVector4 *vec )
+{
+    vmathSoaV3SetElem( &result->col0, row, vmathSoaV4GetElem( vec, 0 ) );
+    vmathSoaV3SetElem( &result->col1, row, vmathSoaV4GetElem( vec, 1 ) );
+    vmathSoaV3SetElem( &result->col2, row, vmathSoaV4GetElem( vec, 2 ) );
+    vmathSoaV3SetElem( &result->col3, row, vmathSoaV4GetElem( vec, 3 ) );
+}
+
+static inline void vmathSoaT3SetElem( VmathSoaTransform3 *result, int col, int row, vec_float4 val )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaT3GetCol( &tmpV3_0, result, col );
+    vmathSoaV3SetElem( &tmpV3_0, row, val );
+    vmathSoaT3SetCol( result, col, &tmpV3_0 );
+}
+
+static inline vec_float4 vmathSoaT3GetElem( const VmathSoaTransform3 *tfrm, int col, int row )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaT3GetCol( &tmpV3_0, tfrm, col );
+    return vmathSoaV3GetElem( &tmpV3_0, row );
+}
+
+static inline void vmathSoaT3GetCol0( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3Copy( result, &tfrm->col0 );
+}
+
+static inline void vmathSoaT3GetCol1( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3Copy( result, &tfrm->col1 );
+}
+
+static inline void vmathSoaT3GetCol2( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3Copy( result, &tfrm->col2 );
+}
+
+static inline void vmathSoaT3GetCol3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3Copy( result, &tfrm->col3 );
+}
+
+static inline void vmathSoaT3GetCol( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, int col )
+{
+    vmathSoaV3Copy( result, (&tfrm->col0 + col) );
+}
+
+static inline void vmathSoaT3GetRow( VmathSoaVector4 *result, const VmathSoaTransform3 *tfrm, int row )
+{
+    vmathSoaV4MakeFromElems( result, vmathSoaV3GetElem( &tfrm->col0, row ), vmathSoaV3GetElem( &tfrm->col1, row ), vmathSoaV3GetElem( &tfrm->col2, row ), vmathSoaV3GetElem( &tfrm->col3, row ) );
+}
+
+static inline void vmathSoaT3Inverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
+{
+    VmathSoaVector3 tmp0, tmp1, tmp2, inv0, inv1, inv2, tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5;
+    vec_float4 detinv;
+    vmathSoaV3Cross( &tmp0, &tfrm->col1, &tfrm->col2 );
+    vmathSoaV3Cross( &tmp1, &tfrm->col2, &tfrm->col0 );
+    vmathSoaV3Cross( &tmp2, &tfrm->col0, &tfrm->col1 );
+    detinv = recipf4( vmathSoaV3Dot( &tfrm->col2, &tmp2 ) );
+    vmathSoaV3MakeFromElems( &inv0, spu_mul( tmp0.x, detinv ), spu_mul( tmp1.x, detinv ), spu_mul( tmp2.x, detinv ) );
+    vmathSoaV3MakeFromElems( &inv1, spu_mul( tmp0.y, detinv ), spu_mul( tmp1.y, detinv ), spu_mul( tmp2.y, detinv ) );
+    vmathSoaV3MakeFromElems( &inv2, spu_mul( tmp0.z, detinv ), spu_mul( tmp1.z, detinv ), spu_mul( tmp2.z, detinv ) );
+    vmathSoaV3Copy( &result->col0, &inv0 );
+    vmathSoaV3Copy( &result->col1, &inv1 );
+    vmathSoaV3Copy( &result->col2, &inv2 );
+    vmathSoaV3ScalarMul( &tmpV3_0, &inv0, tfrm->col3.x );
+    vmathSoaV3ScalarMul( &tmpV3_1, &inv1, tfrm->col3.y );
+    vmathSoaV3ScalarMul( &tmpV3_2, &inv2, tfrm->col3.z );
+    vmathSoaV3Add( &tmpV3_3, &tmpV3_1, &tmpV3_2 );
+    vmathSoaV3Add( &tmpV3_4, &tmpV3_0, &tmpV3_3 );
+    vmathSoaV3Neg( &tmpV3_5, &tmpV3_4 );
+    vmathSoaV3Copy( &result->col3, &tmpV3_5 );
+}
+
+static inline void vmathSoaT3OrthoInverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
+{
+    VmathSoaVector3 inv0, inv1, inv2, tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5;
+    vmathSoaV3MakeFromElems( &inv0, tfrm->col0.x, tfrm->col1.x, tfrm->col2.x );
+    vmathSoaV3MakeFromElems( &inv1, tfrm->col0.y, tfrm->col1.y, tfrm->col2.y );
+    vmathSoaV3MakeFromElems( &inv2, tfrm->col0.z, tfrm->col1.z, tfrm->col2.z );
+    vmathSoaV3Copy( &result->col0, &inv0 );
+    vmathSoaV3Copy( &result->col1, &inv1 );
+    vmathSoaV3Copy( &result->col2, &inv2 );
+    vmathSoaV3ScalarMul( &tmpV3_0, &inv0, tfrm->col3.x );
+    vmathSoaV3ScalarMul( &tmpV3_1, &inv1, tfrm->col3.y );
+    vmathSoaV3ScalarMul( &tmpV3_2, &inv2, tfrm->col3.z );
+    vmathSoaV3Add( &tmpV3_3, &tmpV3_1, &tmpV3_2 );
+    vmathSoaV3Add( &tmpV3_4, &tmpV3_0, &tmpV3_3 );
+    vmathSoaV3Neg( &tmpV3_5, &tmpV3_4 );
+    vmathSoaV3Copy( &result->col3, &tmpV3_5 );
+}
+
+static inline void vmathSoaT3AbsPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3AbsPerElem( &result->col0, &tfrm->col0 );
+    vmathSoaV3AbsPerElem( &result->col1, &tfrm->col1 );
+    vmathSoaV3AbsPerElem( &result->col2, &tfrm->col2 );
+    vmathSoaV3AbsPerElem( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathSoaT3MulV3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *vec )
+{
+    vec_float4 tmpX, tmpY, tmpZ;
+    tmpX = spu_add( spu_add( spu_mul( tfrm->col0.x, vec->x ), spu_mul( tfrm->col1.x, vec->y ) ), spu_mul( tfrm->col2.x, vec->z ) );
+    tmpY = spu_add( spu_add( spu_mul( tfrm->col0.y, vec->x ), spu_mul( tfrm->col1.y, vec->y ) ), spu_mul( tfrm->col2.y, vec->z ) );
+    tmpZ = spu_add( spu_add( spu_mul( tfrm->col0.z, vec->x ), spu_mul( tfrm->col1.z, vec->y ) ), spu_mul( tfrm->col2.z, vec->z ) );
+    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathSoaT3MulP3( VmathSoaPoint3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaPoint3 *pnt )
+{
+    vec_float4 tmpX, tmpY, tmpZ;
+    tmpX = spu_add( spu_add( spu_add( spu_mul( tfrm->col0.x, pnt->x ), spu_mul( tfrm->col1.x, pnt->y ) ), spu_mul( tfrm->col2.x, pnt->z ) ), tfrm->col3.x );
+    tmpY = spu_add( spu_add( spu_add( spu_mul( tfrm->col0.y, pnt->x ), spu_mul( tfrm->col1.y, pnt->y ) ), spu_mul( tfrm->col2.y, pnt->z ) ), tfrm->col3.y );
+    tmpZ = spu_add( spu_add( spu_add( spu_mul( tfrm->col0.z, pnt->x ), spu_mul( tfrm->col1.z, pnt->y ) ), spu_mul( tfrm->col2.z, pnt->z ) ), tfrm->col3.z );
+    vmathSoaP3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathSoaT3Mul( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 )
+{
+    VmathSoaTransform3 tmpResult;
+    VmathSoaPoint3 tmpP3_0, tmpP3_1;
+    vmathSoaT3MulV3( &tmpResult.col0, tfrm0, &tfrm1->col0 );
+    vmathSoaT3MulV3( &tmpResult.col1, tfrm0, &tfrm1->col1 );
+    vmathSoaT3MulV3( &tmpResult.col2, tfrm0, &tfrm1->col2 );
+    vmathSoaP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
+    vmathSoaT3MulP3( &tmpP3_1, tfrm0, &tmpP3_0 );
+    vmathSoaV3MakeFromP3( &tmpResult.col3, &tmpP3_1 );
+    vmathSoaT3Copy( result, &tmpResult );
+}
+
+static inline void vmathSoaT3MulPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 )
+{
+    vmathSoaV3MulPerElem( &result->col0, &tfrm0->col0, &tfrm1->col0 );
+    vmathSoaV3MulPerElem( &result->col1, &tfrm0->col1, &tfrm1->col1 );
+    vmathSoaV3MulPerElem( &result->col2, &tfrm0->col2, &tfrm1->col2 );
+    vmathSoaV3MulPerElem( &result->col3, &tfrm0->col3, &tfrm1->col3 );
+}
+
+static inline void vmathSoaT3MakeIdentity( VmathSoaTransform3 *result )
+{
+    vmathSoaV3MakeXAxis( &result->col0 );
+    vmathSoaV3MakeYAxis( &result->col1 );
+    vmathSoaV3MakeZAxis( &result->col2 );
+    vmathSoaV3MakeFromScalar( &result->col3, spu_splats(0.0f) );
+}
+
+static inline void vmathSoaT3SetUpper3x3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *tfrm )
+{
+    vmathSoaV3Copy( &result->col0, &tfrm->col0 );
+    vmathSoaV3Copy( &result->col1, &tfrm->col1 );
+    vmathSoaV3Copy( &result->col2, &tfrm->col2 );
+}
+
+static inline void vmathSoaT3GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaM3MakeFromCols( result, &tfrm->col0, &tfrm->col1, &tfrm->col2 );
+}
+
+static inline void vmathSoaT3SetTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec )
+{
+    vmathSoaV3Copy( &result->col3, translateVec );
+}
+
+static inline void vmathSoaT3GetTranslation( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3Copy( result, &tfrm->col3 );
+}
+
+static inline void vmathSoaT3MakeRotationX( VmathSoaTransform3 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV3MakeXAxis( &result->col0 );
+    vmathSoaV3MakeFromElems( &result->col1, spu_splats(0.0f), c, s );
+    vmathSoaV3MakeFromElems( &result->col2, spu_splats(0.0f), negatef4( s ), c );
+    vmathSoaV3MakeFromScalar( &result->col3, spu_splats(0.0f) );
+}
+
+static inline void vmathSoaT3MakeRotationY( VmathSoaTransform3 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV3MakeFromElems( &result->col0, c, spu_splats(0.0f), negatef4( s ) );
+    vmathSoaV3MakeYAxis( &result->col1 );
+    vmathSoaV3MakeFromElems( &result->col2, s, spu_splats(0.0f), c );
+    vmathSoaV3MakeFromScalar( &result->col3, spu_splats(0.0f) );
+}
+
+static inline void vmathSoaT3MakeRotationZ( VmathSoaTransform3 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV3MakeFromElems( &result->col0, c, s, spu_splats(0.0f) );
+    vmathSoaV3MakeFromElems( &result->col1, negatef4( s ), c, spu_splats(0.0f) );
+    vmathSoaV3MakeZAxis( &result->col2 );
+    vmathSoaV3MakeFromScalar( &result->col3, spu_splats(0.0f) );
+}
+
+static inline void vmathSoaT3MakeRotationZYX( VmathSoaTransform3 *result, const VmathSoaVector3 *radiansXYZ )
+{
+    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sincosf4( radiansXYZ->x, &sX, &cX );
+    sincosf4( radiansXYZ->y, &sY, &cY );
+    sincosf4( radiansXYZ->z, &sZ, &cZ );
+    tmp0 = spu_mul( cZ, sY );
+    tmp1 = spu_mul( sZ, sY );
+    vmathSoaV3MakeFromElems( &result->col0, spu_mul( cZ, cY ), spu_mul( sZ, cY ), negatef4( sY ) );
+    vmathSoaV3MakeFromElems( &result->col1, spu_sub( spu_mul( tmp0, sX ), spu_mul( sZ, cX ) ), spu_add( spu_mul( tmp1, sX ), spu_mul( cZ, cX ) ), spu_mul( cY, sX ) );
+    vmathSoaV3MakeFromElems( &result->col2, spu_add( spu_mul( tmp0, cX ), spu_mul( sZ, sX ) ), spu_sub( spu_mul( tmp1, cX ), spu_mul( cZ, sX ) ), spu_mul( cY, cX ) );
+    vmathSoaV3MakeFromScalar( &result->col3, spu_splats(0.0f) );
+}
+
+static inline void vmathSoaT3MakeRotationAxis( VmathSoaTransform3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
+{
+    VmathSoaMatrix3 tmpM3_0;
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaM3MakeRotationAxis( &tmpM3_0, radians, unitVec );
+    vmathSoaV3MakeFromScalar( &tmpV3_0, spu_splats(0.0f) );
+    vmathSoaT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
+}
+
+static inline void vmathSoaT3MakeRotationQ( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat )
+{
+    VmathSoaMatrix3 tmpM3_0;
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaM3MakeFromQ( &tmpM3_0, unitQuat );
+    vmathSoaV3MakeFromScalar( &tmpV3_0, spu_splats(0.0f) );
+    vmathSoaT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
+}
+
+static inline void vmathSoaT3MakeScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec )
+{
+    vmathSoaV3MakeFromElems( &result->col0, scaleVec->x, spu_splats(0.0f), spu_splats(0.0f) );
+    vmathSoaV3MakeFromElems( &result->col1, spu_splats(0.0f), scaleVec->y, spu_splats(0.0f) );
+    vmathSoaV3MakeFromElems( &result->col2, spu_splats(0.0f), spu_splats(0.0f), scaleVec->z );
+    vmathSoaV3MakeFromScalar( &result->col3, spu_splats(0.0f) );
+}
+
+static inline void vmathSoaT3AppendScale( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *scaleVec )
+{
+    vmathSoaV3ScalarMul( &result->col0, &tfrm->col0, vmathSoaV3GetX( scaleVec ) );
+    vmathSoaV3ScalarMul( &result->col1, &tfrm->col1, vmathSoaV3GetY( scaleVec ) );
+    vmathSoaV3ScalarMul( &result->col2, &tfrm->col2, vmathSoaV3GetZ( scaleVec ) );
+    vmathSoaV3Copy( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathSoaT3PrependScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3MulPerElem( &result->col0, &tfrm->col0, scaleVec );
+    vmathSoaV3MulPerElem( &result->col1, &tfrm->col1, scaleVec );
+    vmathSoaV3MulPerElem( &result->col2, &tfrm->col2, scaleVec );
+    vmathSoaV3MulPerElem( &result->col3, &tfrm->col3, scaleVec );
+}
+
+static inline void vmathSoaT3MakeTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec )
+{
+    vmathSoaV3MakeXAxis( &result->col0 );
+    vmathSoaV3MakeYAxis( &result->col1 );
+    vmathSoaV3MakeZAxis( &result->col2 );
+    vmathSoaV3Copy( &result->col3, translateVec );
+}
+
+static inline void vmathSoaT3Select( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1, vec_uint4 select1 )
+{
+    vmathSoaV3Select( &result->col0, &tfrm0->col0, &tfrm1->col0, select1 );
+    vmathSoaV3Select( &result->col1, &tfrm0->col1, &tfrm1->col1, select1 );
+    vmathSoaV3Select( &result->col2, &tfrm0->col2, &tfrm1->col2, select1 );
+    vmathSoaV3Select( &result->col3, &tfrm0->col3, &tfrm1->col3, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaT3Print( const VmathSoaTransform3 *tfrm )
+{
+    VmathTransform3 mat0, mat1, mat2, mat3;
+    vmathSoaT3Get4Aos( tfrm, &mat0, &mat1, &mat2, &mat3 );
+    printf("slot 0:\n");
+    vmathT3Print( &mat0 );
+    printf("slot 1:\n");
+    vmathT3Print( &mat1 );
+    printf("slot 2:\n");
+    vmathT3Print( &mat2 );
+    printf("slot 3:\n");
+    vmathT3Print( &mat3 );
+}
+
+static inline void vmathSoaT3Prints( const VmathSoaTransform3 *tfrm, const char *name )
+{
+    printf("%s:\n", name);
+    vmathSoaT3Print( tfrm );
+}
+
+#endif
+
+static inline void vmathSoaQMakeFromM3( VmathSoaQuat *result, const VmathSoaMatrix3 *tfrm )
+{
+    vec_float4 trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
+    vec_uint4 negTrace, ZgtX, ZgtY, YgtX;
+    vec_uint4 largestXorY, largestYorZ, largestZorX;
+
+    xx = tfrm->col0.x;
+    yx = tfrm->col0.y;
+    zx = tfrm->col0.z;
+    xy = tfrm->col1.x;
+    yy = tfrm->col1.y;
+    zy = tfrm->col1.z;
+    xz = tfrm->col2.x;
+    yz = tfrm->col2.y;
+    zz = tfrm->col2.z;
+
+    trace = spu_add( spu_add( xx, yy ), zz );
+
+    negTrace = spu_cmpgt( spu_splats(0.0f), trace );
+    ZgtX = spu_cmpgt( zz, xx );
+    ZgtY = spu_cmpgt( zz, yy );
+    YgtX = spu_cmpgt( yy, xx );
+    largestXorY = spu_and( negTrace, spu_nand( ZgtX, ZgtY ) );
+    largestYorZ = spu_and( negTrace, spu_or( YgtX, ZgtX ) );
+    largestZorX = spu_and( negTrace, spu_orc( ZgtY, YgtX ) );
+    
+    zz = spu_sel( zz, negatef4(zz), largestXorY );
+    xy = spu_sel( xy, negatef4(xy), largestXorY );
+    xx = spu_sel( xx, negatef4(xx), largestYorZ );
+    yz = spu_sel( yz, negatef4(yz), largestYorZ );
+    yy = spu_sel( yy, negatef4(yy), largestZorX );
+    zx = spu_sel( zx, negatef4(zx), largestZorX );
+
+    radicand = spu_add( spu_add( spu_add( xx, yy ), zz ), spu_splats(1.0f) );
+    scale = spu_mul( spu_splats(0.5f), rsqrtf4( radicand ) );
+
+    tmpx = spu_mul( spu_sub( zy, yz ), scale );
+    tmpy = spu_mul( spu_sub( xz, zx ), scale );
+    tmpz = spu_mul( spu_sub( yx, xy ), scale );
+    tmpw = spu_mul( radicand, scale );
+    qx = tmpx;
+    qy = tmpy;
+    qz = tmpz;
+    qw = tmpw;
+
+    qx = spu_sel( qx, tmpw, largestXorY );
+    qy = spu_sel( qy, tmpz, largestXorY );
+    qz = spu_sel( qz, tmpy, largestXorY );
+    qw = spu_sel( qw, tmpx, largestXorY );
+    tmpx = qx;
+    tmpz = qz;
+    qx = spu_sel( qx, qy, largestYorZ );
+    qy = spu_sel( qy, tmpx, largestYorZ );
+    qz = spu_sel( qz, qw, largestYorZ );
+    qw = spu_sel( qw, tmpz, largestYorZ );
+
+    result->x = qx;
+    result->y = qy;
+    result->z = qz;
+    result->w = qw;
+}
+
+static inline void vmathSoaV3Outer( VmathSoaMatrix3 *result, const VmathSoaVector3 *tfrm0, const VmathSoaVector3 *tfrm1 )
+{
+    vmathSoaV3ScalarMul( &result->col0, tfrm0, vmathSoaV3GetX( tfrm1 ) );
+    vmathSoaV3ScalarMul( &result->col1, tfrm0, vmathSoaV3GetY( tfrm1 ) );
+    vmathSoaV3ScalarMul( &result->col2, tfrm0, vmathSoaV3GetZ( tfrm1 ) );
+}
+
+static inline void vmathSoaV4Outer( VmathSoaMatrix4 *result, const VmathSoaVector4 *tfrm0, const VmathSoaVector4 *tfrm1 )
+{
+    vmathSoaV4ScalarMul( &result->col0, tfrm0, vmathSoaV4GetX( tfrm1 ) );
+    vmathSoaV4ScalarMul( &result->col1, tfrm0, vmathSoaV4GetY( tfrm1 ) );
+    vmathSoaV4ScalarMul( &result->col2, tfrm0, vmathSoaV4GetZ( tfrm1 ) );
+    vmathSoaV4ScalarMul( &result->col3, tfrm0, vmathSoaV4GetW( tfrm1 ) );
+}
+
+static inline void vmathSoaV3RowMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat )
+{
+    vec_float4 tmpX, tmpY, tmpZ;
+    tmpX = spu_add( spu_add( spu_mul( vec->x, mat->col0.x ), spu_mul( vec->y, mat->col0.y ) ), spu_mul( vec->z, mat->col0.z ) );
+    tmpY = spu_add( spu_add( spu_mul( vec->x, mat->col1.x ), spu_mul( vec->y, mat->col1.y ) ), spu_mul( vec->z, mat->col1.z ) );
+    tmpZ = spu_add( spu_add( spu_mul( vec->x, mat->col2.x ), spu_mul( vec->y, mat->col2.y ) ), spu_mul( vec->z, mat->col2.z ) );
+    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathSoaV3CrossMatrix( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec )
+{
+    vmathSoaV3MakeFromElems( &result->col0, spu_splats(0.0f), vec->z, negatef4( vec->y ) );
+    vmathSoaV3MakeFromElems( &result->col1, negatef4( vec->z ), spu_splats(0.0f), vec->x );
+    vmathSoaV3MakeFromElems( &result->col2, vec->y, negatef4( vec->x ), spu_splats(0.0f) );
+}
+
+static inline void vmathSoaV3CrossMatrixMul( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat )
+{
+    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2;
+    vmathSoaV3Cross( &tmpV3_0, vec, &mat->col0 );
+    vmathSoaV3Cross( &tmpV3_1, vec, &mat->col1 );
+    vmathSoaV3Cross( &tmpV3_2, vec, &mat->col2 );
+    vmathSoaM3MakeFromCols( result, &tmpV3_0, &tmpV3_1, &tmpV3_2 );
+}
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/mat_soa_v.h b/Extras/vectormathlibrary/include/vectormath/spu/c/mat_soa_v.h
index 0b16a9553..c8401e3d0 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/mat_soa_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/mat_soa_v.h
@@ -1,1063 +1,1063 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_SOA_V_C_H
-#define _VECTORMATH_MAT_SOA_V_C_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- */
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromQ_V( VmathSoaQuat unitQuat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeFromQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromCols_V( VmathSoaVector3 _col0, VmathSoaVector3 _col1, VmathSoaVector3 _col2 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeFromCols(&result, &_col0, &_col1, &_col2);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromAos_V( VmathMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeFromAos(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeFrom4Aos_V( VmathMatrix3 mat0, VmathMatrix3 mat1, VmathMatrix3 mat2, VmathMatrix3 mat3 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeFrom4Aos(&result, &mat0, &mat1, &mat2, &mat3);
-    return result;
-}
-
-static inline void vmathSoaM3Get4Aos_V( VmathSoaMatrix3 mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 )
-{
-    vmathSoaM3Get4Aos(&mat, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaM3SetCol0_V( VmathSoaMatrix3 *result, VmathSoaVector3 _col0 )
-{
-    vmathSoaM3SetCol0(result, &_col0);
-}
-
-static inline void vmathSoaM3SetCol1_V( VmathSoaMatrix3 *result, VmathSoaVector3 _col1 )
-{
-    vmathSoaM3SetCol1(result, &_col1);
-}
-
-static inline void vmathSoaM3SetCol2_V( VmathSoaMatrix3 *result, VmathSoaVector3 _col2 )
-{
-    vmathSoaM3SetCol2(result, &_col2);
-}
-
-static inline void vmathSoaM3SetCol_V( VmathSoaMatrix3 *result, int col, VmathSoaVector3 vec )
-{
-    vmathSoaM3SetCol(result, col, &vec);
-}
-
-static inline void vmathSoaM3SetRow_V( VmathSoaMatrix3 *result, int row, VmathSoaVector3 vec )
-{
-    vmathSoaM3SetRow(result, row, &vec);
-}
-
-static inline void vmathSoaM3SetElem_V( VmathSoaMatrix3 *result, int col, int row, vec_float4 val )
-{
-    vmathSoaM3SetElem(result, col, row, val);
-}
-
-static inline vec_float4 vmathSoaM3GetElem_V( VmathSoaMatrix3 mat, int col, int row )
-{
-    return vmathSoaM3GetElem(&mat, col, row);
-}
-
-static inline VmathSoaVector3 vmathSoaM3GetCol0_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaVector3 result;
-    vmathSoaM3GetCol0(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaM3GetCol1_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaVector3 result;
-    vmathSoaM3GetCol1(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaM3GetCol2_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaVector3 result;
-    vmathSoaM3GetCol2(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaM3GetCol_V( VmathSoaMatrix3 mat, int col )
-{
-    VmathSoaVector3 result;
-    vmathSoaM3GetCol(&result, &mat, col);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaM3GetRow_V( VmathSoaMatrix3 mat, int row )
-{
-    VmathSoaVector3 result;
-    vmathSoaM3GetRow(&result, &mat, row);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Transpose_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Transpose(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Inverse_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Inverse(&result, &mat);
-    return result;
-}
-
-static inline vec_float4 vmathSoaM3Determinant_V( VmathSoaMatrix3 mat )
-{
-    return vmathSoaM3Determinant(&mat);
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Add_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Add(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Sub_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Sub(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Neg_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Neg(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3AbsPerElem_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3AbsPerElem(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3ScalarMul_V( VmathSoaMatrix3 mat, vec_float4 scalar )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3ScalarMul(&result, &mat, scalar);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaM3MulV3_V( VmathSoaMatrix3 mat, VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaM3MulV3(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Mul_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Mul(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MulPerElem_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MulPerElem(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeIdentity_V( )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeIdentity(&result);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationX_V( vec_float4 radians )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationY_V( vec_float4 radians )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZ_V( vec_float4 radians )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationQ_V( VmathSoaQuat unitQuat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeScale_V( VmathSoaVector3 scaleVec )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3AppendScale_V( VmathSoaMatrix3 mat, VmathSoaVector3 scaleVec )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3AppendScale(&result, &mat, &scaleVec);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3PrependScale(&result, &scaleVec, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Select_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1, vec_uint4 select1 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Select(&result, &mat0, &mat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaM3Print_V( VmathSoaMatrix3 mat )
-{
-    vmathSoaM3Print(&mat);
-}
-
-static inline void vmathSoaM3Prints_V( VmathSoaMatrix3 mat, const char *name )
-{
-    vmathSoaM3Prints(&mat, name);
-}
-
-#endif
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromT3_V( VmathSoaTransform3 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFromT3(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromCols_V( VmathSoaVector4 _col0, VmathSoaVector4 _col1, VmathSoaVector4 _col2, VmathSoaVector4 _col3 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromM3V3_V( VmathSoaMatrix3 mat, VmathSoaVector3 translateVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFromM3V3(&result, &mat, &translateVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFromQV3(&result, &unitQuat, &translateVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromAos_V( VmathMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFromAos(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFrom4Aos_V( VmathMatrix4 mat0, VmathMatrix4 mat1, VmathMatrix4 mat2, VmathMatrix4 mat3 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFrom4Aos(&result, &mat0, &mat1, &mat2, &mat3);
-    return result;
-}
-
-static inline void vmathSoaM4Get4Aos_V( VmathSoaMatrix4 mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 )
-{
-    vmathSoaM4Get4Aos(&mat, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaM4SetCol0_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col0 )
-{
-    vmathSoaM4SetCol0(result, &_col0);
-}
-
-static inline void vmathSoaM4SetCol1_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col1 )
-{
-    vmathSoaM4SetCol1(result, &_col1);
-}
-
-static inline void vmathSoaM4SetCol2_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col2 )
-{
-    vmathSoaM4SetCol2(result, &_col2);
-}
-
-static inline void vmathSoaM4SetCol3_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col3 )
-{
-    vmathSoaM4SetCol3(result, &_col3);
-}
-
-static inline void vmathSoaM4SetCol_V( VmathSoaMatrix4 *result, int col, VmathSoaVector4 vec )
-{
-    vmathSoaM4SetCol(result, col, &vec);
-}
-
-static inline void vmathSoaM4SetRow_V( VmathSoaMatrix4 *result, int row, VmathSoaVector4 vec )
-{
-    vmathSoaM4SetRow(result, row, &vec);
-}
-
-static inline void vmathSoaM4SetElem_V( VmathSoaMatrix4 *result, int col, int row, vec_float4 val )
-{
-    vmathSoaM4SetElem(result, col, row, val);
-}
-
-static inline vec_float4 vmathSoaM4GetElem_V( VmathSoaMatrix4 mat, int col, int row )
-{
-    return vmathSoaM4GetElem(&mat, col, row);
-}
-
-static inline VmathSoaVector4 vmathSoaM4GetCol0_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4GetCol0(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4GetCol1_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4GetCol1(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4GetCol2_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4GetCol2(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4GetCol3_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4GetCol3(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4GetCol_V( VmathSoaMatrix4 mat, int col )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4GetCol(&result, &mat, col);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4GetRow_V( VmathSoaMatrix4 mat, int row )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4GetRow(&result, &mat, row);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Transpose_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Transpose(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Inverse_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Inverse(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4AffineInverse_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4AffineInverse(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4OrthoInverse_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4OrthoInverse(&result, &mat);
-    return result;
-}
-
-static inline vec_float4 vmathSoaM4Determinant_V( VmathSoaMatrix4 mat )
-{
-    return vmathSoaM4Determinant(&mat);
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Add_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Add(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Sub_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Sub(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Neg_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Neg(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4AbsPerElem_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4AbsPerElem(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4ScalarMul_V( VmathSoaMatrix4 mat, vec_float4 scalar )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4ScalarMul(&result, &mat, scalar);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4MulV4_V( VmathSoaMatrix4 mat, VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4MulV4(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4MulV3_V( VmathSoaMatrix4 mat, VmathSoaVector3 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4MulV3(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4MulP3_V( VmathSoaMatrix4 mat, VmathSoaPoint3 pnt )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4MulP3(&result, &mat, &pnt);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Mul_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Mul(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MulT3_V( VmathSoaMatrix4 mat, VmathSoaTransform3 tfrm1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MulT3(&result, &mat, &tfrm1);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MulPerElem_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MulPerElem(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeIdentity_V( )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeIdentity(&result);
-    return result;
-}
-
-static inline void vmathSoaM4SetUpper3x3_V( VmathSoaMatrix4 *result, VmathSoaMatrix3 mat3 )
-{
-    vmathSoaM4SetUpper3x3(result, &mat3);
-}
-
-static inline VmathSoaMatrix3 vmathSoaM4GetUpper3x3_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM4GetUpper3x3(&result, &mat);
-    return result;
-}
-
-static inline void vmathSoaM4SetTranslation_V( VmathSoaMatrix4 *result, VmathSoaVector3 translateVec )
-{
-    vmathSoaM4SetTranslation(result, &translateVec);
-}
-
-static inline VmathSoaVector3 vmathSoaM4GetTranslation_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaVector3 result;
-    vmathSoaM4GetTranslation(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationX_V( vec_float4 radians )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationY_V( vec_float4 radians )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZ_V( vec_float4 radians )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZYX_V( VmathSoaVector3 radiansXYZ )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationQ_V( VmathSoaQuat unitQuat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeScale_V( VmathSoaVector3 scaleVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4AppendScale_V( VmathSoaMatrix4 mat, VmathSoaVector3 scaleVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4AppendScale(&result, &mat, &scaleVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4PrependScale(&result, &scaleVec, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeTranslation_V( VmathSoaVector3 translateVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeTranslation(&result, &translateVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeLookAt_V( VmathSoaPoint3 eyePos, VmathSoaPoint3 lookAtPos, VmathSoaVector3 upVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeLookAt(&result, &eyePos, &lookAtPos, &upVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakePerspective_V( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakePerspective(&result, fovyRadians, aspect, zNear, zFar);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFrustum_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFrustum(&result, left, right, bottom, top, zNear, zFar);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeOrthographic_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeOrthographic(&result, left, right, bottom, top, zNear, zFar);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Select_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1, vec_uint4 select1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Select(&result, &mat0, &mat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaM4Print_V( VmathSoaMatrix4 mat )
-{
-    vmathSoaM4Print(&mat);
-}
-
-static inline void vmathSoaM4Prints_V( VmathSoaMatrix4 mat, const char *name )
-{
-    vmathSoaM4Prints(&mat, name);
-}
-
-#endif
-
-static inline VmathSoaTransform3 vmathSoaT3MakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeFromCols_V( VmathSoaVector3 _col0, VmathSoaVector3 _col1, VmathSoaVector3 _col2, VmathSoaVector3 _col3 )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeFromM3V3_V( VmathSoaMatrix3 tfrm, VmathSoaVector3 translateVec )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeFromM3V3(&result, &tfrm, &translateVec);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeFromQV3(&result, &unitQuat, &translateVec);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeFromAos_V( VmathTransform3 tfrm )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeFromAos(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeFrom4Aos_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, VmathTransform3 tfrm2, VmathTransform3 tfrm3 )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeFrom4Aos(&result, &tfrm0, &tfrm1, &tfrm2, &tfrm3);
-    return result;
-}
-
-static inline void vmathSoaT3Get4Aos_V( VmathSoaTransform3 tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 )
-{
-    vmathSoaT3Get4Aos(&tfrm, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaT3SetCol0_V( VmathSoaTransform3 *result, VmathSoaVector3 _col0 )
-{
-    vmathSoaT3SetCol0(result, &_col0);
-}
-
-static inline void vmathSoaT3SetCol1_V( VmathSoaTransform3 *result, VmathSoaVector3 _col1 )
-{
-    vmathSoaT3SetCol1(result, &_col1);
-}
-
-static inline void vmathSoaT3SetCol2_V( VmathSoaTransform3 *result, VmathSoaVector3 _col2 )
-{
-    vmathSoaT3SetCol2(result, &_col2);
-}
-
-static inline void vmathSoaT3SetCol3_V( VmathSoaTransform3 *result, VmathSoaVector3 _col3 )
-{
-    vmathSoaT3SetCol3(result, &_col3);
-}
-
-static inline void vmathSoaT3SetCol_V( VmathSoaTransform3 *result, int col, VmathSoaVector3 vec )
-{
-    vmathSoaT3SetCol(result, col, &vec);
-}
-
-static inline void vmathSoaT3SetRow_V( VmathSoaTransform3 *result, int row, VmathSoaVector4 vec )
-{
-    vmathSoaT3SetRow(result, row, &vec);
-}
-
-static inline void vmathSoaT3SetElem_V( VmathSoaTransform3 *result, int col, int row, vec_float4 val )
-{
-    vmathSoaT3SetElem(result, col, row, val);
-}
-
-static inline vec_float4 vmathSoaT3GetElem_V( VmathSoaTransform3 tfrm, int col, int row )
-{
-    return vmathSoaT3GetElem(&tfrm, col, row);
-}
-
-static inline VmathSoaVector3 vmathSoaT3GetCol0_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3GetCol0(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaT3GetCol1_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3GetCol1(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaT3GetCol2_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3GetCol2(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaT3GetCol3_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3GetCol3(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaT3GetCol_V( VmathSoaTransform3 tfrm, int col )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3GetCol(&result, &tfrm, col);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaT3GetRow_V( VmathSoaTransform3 tfrm, int row )
-{
-    VmathSoaVector4 result;
-    vmathSoaT3GetRow(&result, &tfrm, row);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3Inverse_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3Inverse(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3OrthoInverse_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3OrthoInverse(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3AbsPerElem_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3AbsPerElem(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaT3MulV3_V( VmathSoaTransform3 tfrm, VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3MulV3(&result, &tfrm, &vec);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaT3MulP3_V( VmathSoaTransform3 tfrm, VmathSoaPoint3 pnt )
-{
-    VmathSoaPoint3 result;
-    vmathSoaT3MulP3(&result, &tfrm, &pnt);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3Mul_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3Mul(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MulPerElem_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MulPerElem(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeIdentity_V( )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeIdentity(&result);
-    return result;
-}
-
-static inline void vmathSoaT3SetUpper3x3_V( VmathSoaTransform3 *result, VmathSoaMatrix3 tfrm )
-{
-    vmathSoaT3SetUpper3x3(result, &tfrm);
-}
-
-static inline VmathSoaMatrix3 vmathSoaT3GetUpper3x3_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaT3GetUpper3x3(&result, &tfrm);
-    return result;
-}
-
-static inline void vmathSoaT3SetTranslation_V( VmathSoaTransform3 *result, VmathSoaVector3 translateVec )
-{
-    vmathSoaT3SetTranslation(result, &translateVec);
-}
-
-static inline VmathSoaVector3 vmathSoaT3GetTranslation_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3GetTranslation(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationX_V( vec_float4 radians )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationY_V( vec_float4 radians )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationZ_V( vec_float4 radians )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationQ_V( VmathSoaQuat unitQuat )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeScale_V( VmathSoaVector3 scaleVec )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3AppendScale_V( VmathSoaTransform3 tfrm, VmathSoaVector3 scaleVec )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3AppendScale(&result, &tfrm, &scaleVec);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaTransform3 tfrm )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3PrependScale(&result, &scaleVec, &tfrm);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeTranslation_V( VmathSoaVector3 translateVec )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeTranslation(&result, &translateVec);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3Select_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1, vec_uint4 select1 )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3Select(&result, &tfrm0, &tfrm1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaT3Print_V( VmathSoaTransform3 tfrm )
-{
-    vmathSoaT3Print(&tfrm);
-}
-
-static inline void vmathSoaT3Prints_V( VmathSoaTransform3 tfrm, const char *name )
-{
-    vmathSoaT3Prints(&tfrm, name);
-}
-
-#endif
-
-static inline VmathSoaQuat vmathSoaQMakeFromM3_V( VmathSoaMatrix3 tfrm )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFromM3(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaV3Outer_V( VmathSoaVector3 tfrm0, VmathSoaVector3 tfrm1 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaV3Outer(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaV4Outer_V( VmathSoaVector4 tfrm0, VmathSoaVector4 tfrm1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaV4Outer(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3RowMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3RowMul(&result, &vec, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaV3CrossMatrix_V( VmathSoaVector3 vec )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaV3CrossMatrix(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaV3CrossMatrixMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaV3CrossMatrixMul(&result, &vec, &mat);
-    return result;
-}
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_SOA_V_C_H
+#define _VECTORMATH_MAT_SOA_V_C_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ */
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromQ_V( VmathSoaQuat unitQuat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeFromQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromCols_V( VmathSoaVector3 _col0, VmathSoaVector3 _col1, VmathSoaVector3 _col2 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeFromCols(&result, &_col0, &_col1, &_col2);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromAos_V( VmathMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeFromAos(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeFrom4Aos_V( VmathMatrix3 mat0, VmathMatrix3 mat1, VmathMatrix3 mat2, VmathMatrix3 mat3 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeFrom4Aos(&result, &mat0, &mat1, &mat2, &mat3);
+    return result;
+}
+
+static inline void vmathSoaM3Get4Aos_V( VmathSoaMatrix3 mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 )
+{
+    vmathSoaM3Get4Aos(&mat, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaM3SetCol0_V( VmathSoaMatrix3 *result, VmathSoaVector3 _col0 )
+{
+    vmathSoaM3SetCol0(result, &_col0);
+}
+
+static inline void vmathSoaM3SetCol1_V( VmathSoaMatrix3 *result, VmathSoaVector3 _col1 )
+{
+    vmathSoaM3SetCol1(result, &_col1);
+}
+
+static inline void vmathSoaM3SetCol2_V( VmathSoaMatrix3 *result, VmathSoaVector3 _col2 )
+{
+    vmathSoaM3SetCol2(result, &_col2);
+}
+
+static inline void vmathSoaM3SetCol_V( VmathSoaMatrix3 *result, int col, VmathSoaVector3 vec )
+{
+    vmathSoaM3SetCol(result, col, &vec);
+}
+
+static inline void vmathSoaM3SetRow_V( VmathSoaMatrix3 *result, int row, VmathSoaVector3 vec )
+{
+    vmathSoaM3SetRow(result, row, &vec);
+}
+
+static inline void vmathSoaM3SetElem_V( VmathSoaMatrix3 *result, int col, int row, vec_float4 val )
+{
+    vmathSoaM3SetElem(result, col, row, val);
+}
+
+static inline vec_float4 vmathSoaM3GetElem_V( VmathSoaMatrix3 mat, int col, int row )
+{
+    return vmathSoaM3GetElem(&mat, col, row);
+}
+
+static inline VmathSoaVector3 vmathSoaM3GetCol0_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaVector3 result;
+    vmathSoaM3GetCol0(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaM3GetCol1_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaVector3 result;
+    vmathSoaM3GetCol1(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaM3GetCol2_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaVector3 result;
+    vmathSoaM3GetCol2(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaM3GetCol_V( VmathSoaMatrix3 mat, int col )
+{
+    VmathSoaVector3 result;
+    vmathSoaM3GetCol(&result, &mat, col);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaM3GetRow_V( VmathSoaMatrix3 mat, int row )
+{
+    VmathSoaVector3 result;
+    vmathSoaM3GetRow(&result, &mat, row);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Transpose_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Transpose(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Inverse_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Inverse(&result, &mat);
+    return result;
+}
+
+static inline vec_float4 vmathSoaM3Determinant_V( VmathSoaMatrix3 mat )
+{
+    return vmathSoaM3Determinant(&mat);
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Add_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Add(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Sub_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Sub(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Neg_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Neg(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3AbsPerElem_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3AbsPerElem(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3ScalarMul_V( VmathSoaMatrix3 mat, vec_float4 scalar )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3ScalarMul(&result, &mat, scalar);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaM3MulV3_V( VmathSoaMatrix3 mat, VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaM3MulV3(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Mul_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Mul(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MulPerElem_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MulPerElem(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeIdentity_V( )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeIdentity(&result);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationX_V( vec_float4 radians )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationY_V( vec_float4 radians )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZ_V( vec_float4 radians )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationQ_V( VmathSoaQuat unitQuat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeScale_V( VmathSoaVector3 scaleVec )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3AppendScale_V( VmathSoaMatrix3 mat, VmathSoaVector3 scaleVec )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3AppendScale(&result, &mat, &scaleVec);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3PrependScale(&result, &scaleVec, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Select_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1, vec_uint4 select1 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Select(&result, &mat0, &mat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaM3Print_V( VmathSoaMatrix3 mat )
+{
+    vmathSoaM3Print(&mat);
+}
+
+static inline void vmathSoaM3Prints_V( VmathSoaMatrix3 mat, const char *name )
+{
+    vmathSoaM3Prints(&mat, name);
+}
+
+#endif
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromT3_V( VmathSoaTransform3 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFromT3(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromCols_V( VmathSoaVector4 _col0, VmathSoaVector4 _col1, VmathSoaVector4 _col2, VmathSoaVector4 _col3 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromM3V3_V( VmathSoaMatrix3 mat, VmathSoaVector3 translateVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFromM3V3(&result, &mat, &translateVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFromQV3(&result, &unitQuat, &translateVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromAos_V( VmathMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFromAos(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFrom4Aos_V( VmathMatrix4 mat0, VmathMatrix4 mat1, VmathMatrix4 mat2, VmathMatrix4 mat3 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFrom4Aos(&result, &mat0, &mat1, &mat2, &mat3);
+    return result;
+}
+
+static inline void vmathSoaM4Get4Aos_V( VmathSoaMatrix4 mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 )
+{
+    vmathSoaM4Get4Aos(&mat, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaM4SetCol0_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col0 )
+{
+    vmathSoaM4SetCol0(result, &_col0);
+}
+
+static inline void vmathSoaM4SetCol1_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col1 )
+{
+    vmathSoaM4SetCol1(result, &_col1);
+}
+
+static inline void vmathSoaM4SetCol2_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col2 )
+{
+    vmathSoaM4SetCol2(result, &_col2);
+}
+
+static inline void vmathSoaM4SetCol3_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col3 )
+{
+    vmathSoaM4SetCol3(result, &_col3);
+}
+
+static inline void vmathSoaM4SetCol_V( VmathSoaMatrix4 *result, int col, VmathSoaVector4 vec )
+{
+    vmathSoaM4SetCol(result, col, &vec);
+}
+
+static inline void vmathSoaM4SetRow_V( VmathSoaMatrix4 *result, int row, VmathSoaVector4 vec )
+{
+    vmathSoaM4SetRow(result, row, &vec);
+}
+
+static inline void vmathSoaM4SetElem_V( VmathSoaMatrix4 *result, int col, int row, vec_float4 val )
+{
+    vmathSoaM4SetElem(result, col, row, val);
+}
+
+static inline vec_float4 vmathSoaM4GetElem_V( VmathSoaMatrix4 mat, int col, int row )
+{
+    return vmathSoaM4GetElem(&mat, col, row);
+}
+
+static inline VmathSoaVector4 vmathSoaM4GetCol0_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4GetCol0(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4GetCol1_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4GetCol1(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4GetCol2_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4GetCol2(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4GetCol3_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4GetCol3(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4GetCol_V( VmathSoaMatrix4 mat, int col )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4GetCol(&result, &mat, col);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4GetRow_V( VmathSoaMatrix4 mat, int row )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4GetRow(&result, &mat, row);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Transpose_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Transpose(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Inverse_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Inverse(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4AffineInverse_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4AffineInverse(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4OrthoInverse_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4OrthoInverse(&result, &mat);
+    return result;
+}
+
+static inline vec_float4 vmathSoaM4Determinant_V( VmathSoaMatrix4 mat )
+{
+    return vmathSoaM4Determinant(&mat);
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Add_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Add(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Sub_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Sub(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Neg_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Neg(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4AbsPerElem_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4AbsPerElem(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4ScalarMul_V( VmathSoaMatrix4 mat, vec_float4 scalar )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4ScalarMul(&result, &mat, scalar);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4MulV4_V( VmathSoaMatrix4 mat, VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4MulV4(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4MulV3_V( VmathSoaMatrix4 mat, VmathSoaVector3 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4MulV3(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4MulP3_V( VmathSoaMatrix4 mat, VmathSoaPoint3 pnt )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4MulP3(&result, &mat, &pnt);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Mul_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Mul(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MulT3_V( VmathSoaMatrix4 mat, VmathSoaTransform3 tfrm1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MulT3(&result, &mat, &tfrm1);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MulPerElem_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MulPerElem(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeIdentity_V( )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeIdentity(&result);
+    return result;
+}
+
+static inline void vmathSoaM4SetUpper3x3_V( VmathSoaMatrix4 *result, VmathSoaMatrix3 mat3 )
+{
+    vmathSoaM4SetUpper3x3(result, &mat3);
+}
+
+static inline VmathSoaMatrix3 vmathSoaM4GetUpper3x3_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM4GetUpper3x3(&result, &mat);
+    return result;
+}
+
+static inline void vmathSoaM4SetTranslation_V( VmathSoaMatrix4 *result, VmathSoaVector3 translateVec )
+{
+    vmathSoaM4SetTranslation(result, &translateVec);
+}
+
+static inline VmathSoaVector3 vmathSoaM4GetTranslation_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaVector3 result;
+    vmathSoaM4GetTranslation(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationX_V( vec_float4 radians )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationY_V( vec_float4 radians )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZ_V( vec_float4 radians )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZYX_V( VmathSoaVector3 radiansXYZ )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationQ_V( VmathSoaQuat unitQuat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeScale_V( VmathSoaVector3 scaleVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4AppendScale_V( VmathSoaMatrix4 mat, VmathSoaVector3 scaleVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4AppendScale(&result, &mat, &scaleVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4PrependScale(&result, &scaleVec, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeTranslation_V( VmathSoaVector3 translateVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeTranslation(&result, &translateVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeLookAt_V( VmathSoaPoint3 eyePos, VmathSoaPoint3 lookAtPos, VmathSoaVector3 upVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeLookAt(&result, &eyePos, &lookAtPos, &upVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakePerspective_V( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakePerspective(&result, fovyRadians, aspect, zNear, zFar);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFrustum_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFrustum(&result, left, right, bottom, top, zNear, zFar);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeOrthographic_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeOrthographic(&result, left, right, bottom, top, zNear, zFar);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Select_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1, vec_uint4 select1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Select(&result, &mat0, &mat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaM4Print_V( VmathSoaMatrix4 mat )
+{
+    vmathSoaM4Print(&mat);
+}
+
+static inline void vmathSoaM4Prints_V( VmathSoaMatrix4 mat, const char *name )
+{
+    vmathSoaM4Prints(&mat, name);
+}
+
+#endif
+
+static inline VmathSoaTransform3 vmathSoaT3MakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeFromCols_V( VmathSoaVector3 _col0, VmathSoaVector3 _col1, VmathSoaVector3 _col2, VmathSoaVector3 _col3 )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeFromM3V3_V( VmathSoaMatrix3 tfrm, VmathSoaVector3 translateVec )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeFromM3V3(&result, &tfrm, &translateVec);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeFromQV3(&result, &unitQuat, &translateVec);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeFromAos_V( VmathTransform3 tfrm )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeFromAos(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeFrom4Aos_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, VmathTransform3 tfrm2, VmathTransform3 tfrm3 )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeFrom4Aos(&result, &tfrm0, &tfrm1, &tfrm2, &tfrm3);
+    return result;
+}
+
+static inline void vmathSoaT3Get4Aos_V( VmathSoaTransform3 tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 )
+{
+    vmathSoaT3Get4Aos(&tfrm, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaT3SetCol0_V( VmathSoaTransform3 *result, VmathSoaVector3 _col0 )
+{
+    vmathSoaT3SetCol0(result, &_col0);
+}
+
+static inline void vmathSoaT3SetCol1_V( VmathSoaTransform3 *result, VmathSoaVector3 _col1 )
+{
+    vmathSoaT3SetCol1(result, &_col1);
+}
+
+static inline void vmathSoaT3SetCol2_V( VmathSoaTransform3 *result, VmathSoaVector3 _col2 )
+{
+    vmathSoaT3SetCol2(result, &_col2);
+}
+
+static inline void vmathSoaT3SetCol3_V( VmathSoaTransform3 *result, VmathSoaVector3 _col3 )
+{
+    vmathSoaT3SetCol3(result, &_col3);
+}
+
+static inline void vmathSoaT3SetCol_V( VmathSoaTransform3 *result, int col, VmathSoaVector3 vec )
+{
+    vmathSoaT3SetCol(result, col, &vec);
+}
+
+static inline void vmathSoaT3SetRow_V( VmathSoaTransform3 *result, int row, VmathSoaVector4 vec )
+{
+    vmathSoaT3SetRow(result, row, &vec);
+}
+
+static inline void vmathSoaT3SetElem_V( VmathSoaTransform3 *result, int col, int row, vec_float4 val )
+{
+    vmathSoaT3SetElem(result, col, row, val);
+}
+
+static inline vec_float4 vmathSoaT3GetElem_V( VmathSoaTransform3 tfrm, int col, int row )
+{
+    return vmathSoaT3GetElem(&tfrm, col, row);
+}
+
+static inline VmathSoaVector3 vmathSoaT3GetCol0_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3GetCol0(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaT3GetCol1_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3GetCol1(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaT3GetCol2_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3GetCol2(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaT3GetCol3_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3GetCol3(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaT3GetCol_V( VmathSoaTransform3 tfrm, int col )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3GetCol(&result, &tfrm, col);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaT3GetRow_V( VmathSoaTransform3 tfrm, int row )
+{
+    VmathSoaVector4 result;
+    vmathSoaT3GetRow(&result, &tfrm, row);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3Inverse_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3Inverse(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3OrthoInverse_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3OrthoInverse(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3AbsPerElem_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3AbsPerElem(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaT3MulV3_V( VmathSoaTransform3 tfrm, VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3MulV3(&result, &tfrm, &vec);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaT3MulP3_V( VmathSoaTransform3 tfrm, VmathSoaPoint3 pnt )
+{
+    VmathSoaPoint3 result;
+    vmathSoaT3MulP3(&result, &tfrm, &pnt);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3Mul_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3Mul(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MulPerElem_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MulPerElem(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeIdentity_V( )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeIdentity(&result);
+    return result;
+}
+
+static inline void vmathSoaT3SetUpper3x3_V( VmathSoaTransform3 *result, VmathSoaMatrix3 tfrm )
+{
+    vmathSoaT3SetUpper3x3(result, &tfrm);
+}
+
+static inline VmathSoaMatrix3 vmathSoaT3GetUpper3x3_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaT3GetUpper3x3(&result, &tfrm);
+    return result;
+}
+
+static inline void vmathSoaT3SetTranslation_V( VmathSoaTransform3 *result, VmathSoaVector3 translateVec )
+{
+    vmathSoaT3SetTranslation(result, &translateVec);
+}
+
+static inline VmathSoaVector3 vmathSoaT3GetTranslation_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3GetTranslation(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationX_V( vec_float4 radians )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationY_V( vec_float4 radians )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationZ_V( vec_float4 radians )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationQ_V( VmathSoaQuat unitQuat )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeScale_V( VmathSoaVector3 scaleVec )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3AppendScale_V( VmathSoaTransform3 tfrm, VmathSoaVector3 scaleVec )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3AppendScale(&result, &tfrm, &scaleVec);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaTransform3 tfrm )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3PrependScale(&result, &scaleVec, &tfrm);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeTranslation_V( VmathSoaVector3 translateVec )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeTranslation(&result, &translateVec);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3Select_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1, vec_uint4 select1 )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3Select(&result, &tfrm0, &tfrm1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaT3Print_V( VmathSoaTransform3 tfrm )
+{
+    vmathSoaT3Print(&tfrm);
+}
+
+static inline void vmathSoaT3Prints_V( VmathSoaTransform3 tfrm, const char *name )
+{
+    vmathSoaT3Prints(&tfrm, name);
+}
+
+#endif
+
+static inline VmathSoaQuat vmathSoaQMakeFromM3_V( VmathSoaMatrix3 tfrm )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFromM3(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaV3Outer_V( VmathSoaVector3 tfrm0, VmathSoaVector3 tfrm1 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaV3Outer(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaV4Outer_V( VmathSoaVector4 tfrm0, VmathSoaVector4 tfrm1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaV4Outer(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3RowMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3RowMul(&result, &vec, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaV3CrossMatrix_V( VmathSoaVector3 vec )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaV3CrossMatrix(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaV3CrossMatrixMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaV3CrossMatrixMul(&result, &vec, &mat);
+    return result;
+}
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/quat_aos.h b/Extras/vectormathlibrary/include/vectormath/spu/c/quat_aos.h
index 57ff6ecb1..0f25d654b 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/quat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/quat_aos.h
@@ -1,371 +1,371 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_AOS_C_H
-#define _VECTORMATH_QUAT_AOS_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline void vmathQCopy( VmathQuat *result, const VmathQuat *quat )
-{
-    result->vec128 = quat->vec128;
-}
-
-static inline void vmathQMakeFromElems( VmathQuat *result, float _x, float _y, float _z, float _w )
-{
-    result->vec128 = (vec_float4){ _x, _y, _z, _w };
-}
-
-static inline void vmathQMakeFromV3Scalar( VmathQuat *result, const VmathVector3 *xyz, float _w )
-{
-    result->vec128 = spu_shuffle( xyz->vec128, spu_promote( _w, 0 ), _VECTORMATH_SHUF_XYZA );
-}
-
-static inline void vmathQMakeFromV4( VmathQuat *result, const VmathVector4 *vec )
-{
-    result->vec128 = vec->vec128;
-}
-
-static inline void vmathQMakeFromScalar( VmathQuat *result, float scalar )
-{
-    result->vec128 = spu_splats( scalar );
-}
-
-static inline void vmathQMakeFrom128( VmathQuat *result, vec_float4 vf4 )
-{
-    result->vec128 = vf4;
-}
-
-static inline void vmathQMakeIdentity( VmathQuat *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_0001;
-}
-
-static inline void vmathQLerp( VmathQuat *result, float t, const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    VmathQuat tmpQ_0, tmpQ_1;
-    vmathQSub( &tmpQ_0, quat1, quat0 );
-    vmathQScalarMul( &tmpQ_1, &tmpQ_0, t );
-    vmathQAdd( result, quat0, &tmpQ_1 );
-}
-
-static inline void vmathQSlerp( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1 )
-{
-    VmathQuat start;
-    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    vec_uint4 selectMask;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    cosAngle = _vmathVfDot4( unitQuat0->vec128, unitQuat1->vec128 );
-    cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(0.0f), cosAngle );
-    cosAngle = spu_sel( cosAngle, negatef4( cosAngle ), selectMask );
-    start.vec128 = spu_sel( unitQuat0->vec128, negatef4( unitQuat0->vec128 ), selectMask );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = spu_splats(t);
-    oneMinusT = spu_sub( spu_splats(1.0f), tttt );
-    angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
-    angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
-    angles = spu_mul( angles, angle );
-    sines = sinf4( angles );
-    scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
-    scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
-    scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
-    result->vec128 = spu_madd( start.vec128, scale0, spu_mul( unitQuat1->vec128, scale1 ) );
-}
-
-static inline void vmathQSquad( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1, const VmathQuat *unitQuat2, const VmathQuat *unitQuat3 )
-{
-    VmathQuat tmp0, tmp1;
-    vmathQSlerp( &tmp0, t, unitQuat0, unitQuat3 );
-    vmathQSlerp( &tmp1, t, unitQuat1, unitQuat2 );
-    vmathQSlerp( result, ( ( 2.0f * t ) * ( 1.0f - t ) ), &tmp0, &tmp1 );
-}
-
-static inline vec_float4 vmathQGet128( const VmathQuat *quat )
-{
-    return quat->vec128;
-}
-
-static inline void vmathQSetXYZ( VmathQuat *result, const VmathVector3 *vec )
-{
-    result->vec128 = spu_sel( vec->vec128, result->vec128, (vec_uint4)spu_maskb(0x000f) );
-}
-
-static inline void vmathQGetXYZ( VmathVector3 *result, const VmathQuat *quat )
-{
-    result->vec128 = quat->vec128;
-}
-
-static inline void vmathQSetX( VmathQuat *result, float _x )
-{
-    result->vec128 = spu_insert( _x, result->vec128, 0 );
-}
-
-static inline float vmathQGetX( const VmathQuat *quat )
-{
-    return spu_extract( quat->vec128, 0 );
-}
-
-static inline void vmathQSetY( VmathQuat *result, float _y )
-{
-    result->vec128 = spu_insert( _y, result->vec128, 1 );
-}
-
-static inline float vmathQGetY( const VmathQuat *quat )
-{
-    return spu_extract( quat->vec128, 1 );
-}
-
-static inline void vmathQSetZ( VmathQuat *result, float _z )
-{
-    result->vec128 = spu_insert( _z, result->vec128, 2 );
-}
-
-static inline float vmathQGetZ( const VmathQuat *quat )
-{
-    return spu_extract( quat->vec128, 2 );
-}
-
-static inline void vmathQSetW( VmathQuat *result, float _w )
-{
-    result->vec128 = spu_insert( _w, result->vec128, 3 );
-}
-
-static inline float vmathQGetW( const VmathQuat *quat )
-{
-    return spu_extract( quat->vec128, 3 );
-}
-
-static inline void vmathQSetElem( VmathQuat *result, int idx, float value )
-{
-    result->vec128 = spu_insert( value, result->vec128, idx );
-}
-
-static inline float vmathQGetElem( const VmathQuat *quat, int idx )
-{
-    return spu_extract( quat->vec128, idx );
-}
-
-static inline void vmathQAdd( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    result->vec128 = spu_add( quat0->vec128, quat1->vec128 );
-}
-
-static inline void vmathQSub( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    result->vec128 = spu_sub( quat0->vec128, quat1->vec128 );
-}
-
-static inline void vmathQScalarMul( VmathQuat *result, const VmathQuat *quat, float scalar )
-{
-    result->vec128 = spu_mul( quat->vec128, spu_splats(scalar) );
-}
-
-static inline void vmathQScalarDiv( VmathQuat *result, const VmathQuat *quat, float scalar )
-{
-    result->vec128 = divf4( quat->vec128, spu_splats(scalar) );
-}
-
-static inline void vmathQNeg( VmathQuat *result, const VmathQuat *quat )
-{
-    result->vec128 = negatef4( quat->vec128 );
-}
-
-static inline float vmathQDot( const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    return spu_extract( _vmathVfDot4( quat0->vec128, quat1->vec128 ), 0 );
-}
-
-static inline float vmathQNorm( const VmathQuat *quat )
-{
-    return spu_extract( _vmathVfDot4( quat->vec128, quat->vec128 ), 0 );
-}
-
-static inline float vmathQLength( const VmathQuat *quat )
-{
-    return sqrtf( vmathQNorm( quat ) );
-}
-
-static inline void vmathQNormalize( VmathQuat *result, const VmathQuat *quat )
-{
-    vec_float4 dot = _vmathVfDot4( quat->vec128, quat->vec128 );
-    result->vec128 = spu_mul( quat->vec128, rsqrtf4( dot ) );
-}
-
-static inline void vmathQMakeRotationArc( VmathQuat *result, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
-{
-    VmathVector3 crossVec, tmpV3_0;
-    vec_float4 cosAngle, cosAngleX2Plus2, recipCosHalfAngleX2, cosHalfAngleX2, res;
-    cosAngle = _vmathVfDot3( unitVec0->vec128, unitVec1->vec128 );
-    cosAngle = spu_shuffle( cosAngle, cosAngle, (vec_uchar16)spu_splats(0x00010203) );
-    cosAngleX2Plus2 = spu_madd( cosAngle, spu_splats(2.0f), spu_splats(2.0f) );
-    recipCosHalfAngleX2 = rsqrtf4( cosAngleX2Plus2 );
-    cosHalfAngleX2 = spu_mul( recipCosHalfAngleX2, cosAngleX2Plus2 );
-    vmathV3Cross( &tmpV3_0, unitVec0, unitVec1 );
-    crossVec = tmpV3_0;
-    res = spu_mul( crossVec.vec128, recipCosHalfAngleX2 );
-    res = spu_sel( res, spu_mul( cosHalfAngleX2, spu_splats(0.5f) ), (vec_uint4)spu_maskb(0x000f) );
-    result->vec128 = res;
-}
-
-static inline void vmathQMakeRotationAxis( VmathQuat *result, float radians, const VmathVector3 *unitVec )
-{
-    vec_float4 s, c, angle, res;
-    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    res = spu_sel( spu_mul( unitVec->vec128, s ), c, (vec_uint4)spu_maskb(0x000f) );
-    result->vec128 = res;
-}
-
-static inline void vmathQMakeRotationX( VmathQuat *result, float radians )
-{
-    vec_float4 s, c, angle, res;
-    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    res = spu_sel( spu_splats(0.0f), s, (vec_uint4)spu_maskb(0xf000) );
-    res = spu_sel( res, c, (vec_uint4)spu_maskb(0x000f) );
-    result->vec128 = res;
-}
-
-static inline void vmathQMakeRotationY( VmathQuat *result, float radians )
-{
-    vec_float4 s, c, angle, res;
-    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    res = spu_sel( spu_splats(0.0f), s, (vec_uint4)spu_maskb(0x0f00) );
-    res = spu_sel( res, c, (vec_uint4)spu_maskb(0x000f) );
-    result->vec128 = res;
-}
-
-static inline void vmathQMakeRotationZ( VmathQuat *result, float radians )
-{
-    vec_float4 s, c, angle, res;
-    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    res = spu_sel( spu_splats(0.0f), s, (vec_uint4)spu_maskb(0x00f0) );
-    res = spu_sel( res, c, (vec_uint4)spu_maskb(0x000f) );
-    result->vec128 = res;
-}
-
-static inline void vmathQMul( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    vec_float4 ldata, rdata, qv, tmp0, tmp1, tmp2, tmp3;
-    vec_float4 product, l_wxyz, r_wxyz, xy, qw;
-    ldata = quat0->vec128;
-    rdata = quat1->vec128;
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
-    tmp0 = spu_shuffle( ldata, ldata, _VECTORMATH_SHUF_YZXW );
-    tmp1 = spu_shuffle( rdata, rdata, _VECTORMATH_SHUF_ZXYW );
-    tmp2 = spu_shuffle( ldata, ldata, _VECTORMATH_SHUF_ZXYW );
-    tmp3 = spu_shuffle( rdata, rdata, _VECTORMATH_SHUF_YZXW );
-    qv = spu_mul( spu_shuffle( ldata, ldata, shuffle_wwww ), rdata );
-    qv = spu_madd( spu_shuffle( rdata, rdata, shuffle_wwww ), ldata, qv );
-    qv = spu_madd( tmp0, tmp1, qv );
-    qv = spu_nmsub( tmp2, tmp3, qv );
-    product = spu_mul( ldata, rdata );
-    l_wxyz = spu_rlqwbyte( ldata, 12 );
-    r_wxyz = spu_rlqwbyte( rdata, 12 );
-    qw = spu_nmsub( l_wxyz, r_wxyz, product );
-    xy = spu_madd( l_wxyz, r_wxyz, product );
-    qw = spu_sub( qw, spu_rlqwbyte( xy, 8 ) );
-    result->vec128 = spu_sel( qv, qw, (vec_uint4)spu_maskb( 0x000f ) );
-}
-
-static inline void vmathQRotate( VmathVector3 *result, const VmathQuat *quat, const VmathVector3 *vec )
-{
-    vec_float4 qdata, vdata, product, tmp0, tmp1, tmp2, tmp3, wwww, qv, qw, res;
-    qdata = quat->vec128;
-    vdata = vec->vec128;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
-    tmp0 = spu_shuffle( qdata, qdata, _VECTORMATH_SHUF_YZXW );
-    tmp1 = spu_shuffle( vdata, vdata, _VECTORMATH_SHUF_ZXYW );
-    tmp2 = spu_shuffle( qdata, qdata, _VECTORMATH_SHUF_ZXYW );
-    tmp3 = spu_shuffle( vdata, vdata, _VECTORMATH_SHUF_YZXW );
-    wwww = spu_shuffle( qdata, qdata, shuffle_wwww );
-    qv = spu_mul( wwww, vdata );
-    qv = spu_madd( tmp0, tmp1, qv );
-    qv = spu_nmsub( tmp2, tmp3, qv );
-    product = spu_mul( qdata, vdata );
-    qw = spu_madd( spu_rlqwbyte( qdata, 4 ), spu_rlqwbyte( vdata, 4 ), product );
-    qw = spu_add( spu_rlqwbyte( product, 8 ), qw );
-    tmp1 = spu_shuffle( qv, qv, _VECTORMATH_SHUF_ZXYW );
-    tmp3 = spu_shuffle( qv, qv, _VECTORMATH_SHUF_YZXW );
-    res = spu_mul( spu_shuffle( qw, qw, shuffle_xxxx ), qdata );
-    res = spu_madd( wwww, qv, res );
-    res = spu_madd( tmp0, tmp1, res );
-    res = spu_nmsub( tmp2, tmp3, res );
-    result->vec128 = res;
-}
-
-static inline void vmathQConj( VmathQuat *result, const VmathQuat *quat )
-{
-    result->vec128 = spu_xor( quat->vec128, ((vec_float4)(vec_int4){0x80000000,0x80000000,0x80000000,0}) );
-}
-
-static inline void vmathQSelect( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, unsigned int select1 )
-{
-    result->vec128 = spu_sel( quat0->vec128, quat1->vec128, spu_splats( (unsigned int)-(select1 > 0) ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathQPrint( const VmathQuat *quat )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = quat->vec128;
-    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-static inline void vmathQPrints( const VmathQuat *quat, const char *name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = quat->vec128;
-    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_AOS_C_H
+#define _VECTORMATH_QUAT_AOS_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline void vmathQCopy( VmathQuat *result, const VmathQuat *quat )
+{
+    result->vec128 = quat->vec128;
+}
+
+static inline void vmathQMakeFromElems( VmathQuat *result, float _x, float _y, float _z, float _w )
+{
+    result->vec128 = (vec_float4){ _x, _y, _z, _w };
+}
+
+static inline void vmathQMakeFromV3Scalar( VmathQuat *result, const VmathVector3 *xyz, float _w )
+{
+    result->vec128 = spu_shuffle( xyz->vec128, spu_promote( _w, 0 ), _VECTORMATH_SHUF_XYZA );
+}
+
+static inline void vmathQMakeFromV4( VmathQuat *result, const VmathVector4 *vec )
+{
+    result->vec128 = vec->vec128;
+}
+
+static inline void vmathQMakeFromScalar( VmathQuat *result, float scalar )
+{
+    result->vec128 = spu_splats( scalar );
+}
+
+static inline void vmathQMakeFrom128( VmathQuat *result, vec_float4 vf4 )
+{
+    result->vec128 = vf4;
+}
+
+static inline void vmathQMakeIdentity( VmathQuat *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_0001;
+}
+
+static inline void vmathQLerp( VmathQuat *result, float t, const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    VmathQuat tmpQ_0, tmpQ_1;
+    vmathQSub( &tmpQ_0, quat1, quat0 );
+    vmathQScalarMul( &tmpQ_1, &tmpQ_0, t );
+    vmathQAdd( result, quat0, &tmpQ_1 );
+}
+
+static inline void vmathQSlerp( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1 )
+{
+    VmathQuat start;
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    vec_uint4 selectMask;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    cosAngle = _vmathVfDot4( unitQuat0->vec128, unitQuat1->vec128 );
+    cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(0.0f), cosAngle );
+    cosAngle = spu_sel( cosAngle, negatef4( cosAngle ), selectMask );
+    start.vec128 = spu_sel( unitQuat0->vec128, negatef4( unitQuat0->vec128 ), selectMask );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = spu_splats(t);
+    oneMinusT = spu_sub( spu_splats(1.0f), tttt );
+    angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
+    angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
+    angles = spu_mul( angles, angle );
+    sines = sinf4( angles );
+    scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
+    scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
+    scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
+    result->vec128 = spu_madd( start.vec128, scale0, spu_mul( unitQuat1->vec128, scale1 ) );
+}
+
+static inline void vmathQSquad( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1, const VmathQuat *unitQuat2, const VmathQuat *unitQuat3 )
+{
+    VmathQuat tmp0, tmp1;
+    vmathQSlerp( &tmp0, t, unitQuat0, unitQuat3 );
+    vmathQSlerp( &tmp1, t, unitQuat1, unitQuat2 );
+    vmathQSlerp( result, ( ( 2.0f * t ) * ( 1.0f - t ) ), &tmp0, &tmp1 );
+}
+
+static inline vec_float4 vmathQGet128( const VmathQuat *quat )
+{
+    return quat->vec128;
+}
+
+static inline void vmathQSetXYZ( VmathQuat *result, const VmathVector3 *vec )
+{
+    result->vec128 = spu_sel( vec->vec128, result->vec128, (vec_uint4)spu_maskb(0x000f) );
+}
+
+static inline void vmathQGetXYZ( VmathVector3 *result, const VmathQuat *quat )
+{
+    result->vec128 = quat->vec128;
+}
+
+static inline void vmathQSetX( VmathQuat *result, float _x )
+{
+    result->vec128 = spu_insert( _x, result->vec128, 0 );
+}
+
+static inline float vmathQGetX( const VmathQuat *quat )
+{
+    return spu_extract( quat->vec128, 0 );
+}
+
+static inline void vmathQSetY( VmathQuat *result, float _y )
+{
+    result->vec128 = spu_insert( _y, result->vec128, 1 );
+}
+
+static inline float vmathQGetY( const VmathQuat *quat )
+{
+    return spu_extract( quat->vec128, 1 );
+}
+
+static inline void vmathQSetZ( VmathQuat *result, float _z )
+{
+    result->vec128 = spu_insert( _z, result->vec128, 2 );
+}
+
+static inline float vmathQGetZ( const VmathQuat *quat )
+{
+    return spu_extract( quat->vec128, 2 );
+}
+
+static inline void vmathQSetW( VmathQuat *result, float _w )
+{
+    result->vec128 = spu_insert( _w, result->vec128, 3 );
+}
+
+static inline float vmathQGetW( const VmathQuat *quat )
+{
+    return spu_extract( quat->vec128, 3 );
+}
+
+static inline void vmathQSetElem( VmathQuat *result, int idx, float value )
+{
+    result->vec128 = spu_insert( value, result->vec128, idx );
+}
+
+static inline float vmathQGetElem( const VmathQuat *quat, int idx )
+{
+    return spu_extract( quat->vec128, idx );
+}
+
+static inline void vmathQAdd( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    result->vec128 = spu_add( quat0->vec128, quat1->vec128 );
+}
+
+static inline void vmathQSub( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    result->vec128 = spu_sub( quat0->vec128, quat1->vec128 );
+}
+
+static inline void vmathQScalarMul( VmathQuat *result, const VmathQuat *quat, float scalar )
+{
+    result->vec128 = spu_mul( quat->vec128, spu_splats(scalar) );
+}
+
+static inline void vmathQScalarDiv( VmathQuat *result, const VmathQuat *quat, float scalar )
+{
+    result->vec128 = divf4( quat->vec128, spu_splats(scalar) );
+}
+
+static inline void vmathQNeg( VmathQuat *result, const VmathQuat *quat )
+{
+    result->vec128 = negatef4( quat->vec128 );
+}
+
+static inline float vmathQDot( const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    return spu_extract( _vmathVfDot4( quat0->vec128, quat1->vec128 ), 0 );
+}
+
+static inline float vmathQNorm( const VmathQuat *quat )
+{
+    return spu_extract( _vmathVfDot4( quat->vec128, quat->vec128 ), 0 );
+}
+
+static inline float vmathQLength( const VmathQuat *quat )
+{
+    return sqrtf( vmathQNorm( quat ) );
+}
+
+static inline void vmathQNormalize( VmathQuat *result, const VmathQuat *quat )
+{
+    vec_float4 dot = _vmathVfDot4( quat->vec128, quat->vec128 );
+    result->vec128 = spu_mul( quat->vec128, rsqrtf4( dot ) );
+}
+
+static inline void vmathQMakeRotationArc( VmathQuat *result, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
+{
+    VmathVector3 crossVec, tmpV3_0;
+    vec_float4 cosAngle, cosAngleX2Plus2, recipCosHalfAngleX2, cosHalfAngleX2, res;
+    cosAngle = _vmathVfDot3( unitVec0->vec128, unitVec1->vec128 );
+    cosAngle = spu_shuffle( cosAngle, cosAngle, (vec_uchar16)spu_splats(0x00010203) );
+    cosAngleX2Plus2 = spu_madd( cosAngle, spu_splats(2.0f), spu_splats(2.0f) );
+    recipCosHalfAngleX2 = rsqrtf4( cosAngleX2Plus2 );
+    cosHalfAngleX2 = spu_mul( recipCosHalfAngleX2, cosAngleX2Plus2 );
+    vmathV3Cross( &tmpV3_0, unitVec0, unitVec1 );
+    crossVec = tmpV3_0;
+    res = spu_mul( crossVec.vec128, recipCosHalfAngleX2 );
+    res = spu_sel( res, spu_mul( cosHalfAngleX2, spu_splats(0.5f) ), (vec_uint4)spu_maskb(0x000f) );
+    result->vec128 = res;
+}
+
+static inline void vmathQMakeRotationAxis( VmathQuat *result, float radians, const VmathVector3 *unitVec )
+{
+    vec_float4 s, c, angle, res;
+    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    res = spu_sel( spu_mul( unitVec->vec128, s ), c, (vec_uint4)spu_maskb(0x000f) );
+    result->vec128 = res;
+}
+
+static inline void vmathQMakeRotationX( VmathQuat *result, float radians )
+{
+    vec_float4 s, c, angle, res;
+    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    res = spu_sel( spu_splats(0.0f), s, (vec_uint4)spu_maskb(0xf000) );
+    res = spu_sel( res, c, (vec_uint4)spu_maskb(0x000f) );
+    result->vec128 = res;
+}
+
+static inline void vmathQMakeRotationY( VmathQuat *result, float radians )
+{
+    vec_float4 s, c, angle, res;
+    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    res = spu_sel( spu_splats(0.0f), s, (vec_uint4)spu_maskb(0x0f00) );
+    res = spu_sel( res, c, (vec_uint4)spu_maskb(0x000f) );
+    result->vec128 = res;
+}
+
+static inline void vmathQMakeRotationZ( VmathQuat *result, float radians )
+{
+    vec_float4 s, c, angle, res;
+    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    res = spu_sel( spu_splats(0.0f), s, (vec_uint4)spu_maskb(0x00f0) );
+    res = spu_sel( res, c, (vec_uint4)spu_maskb(0x000f) );
+    result->vec128 = res;
+}
+
+static inline void vmathQMul( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    vec_float4 ldata, rdata, qv, tmp0, tmp1, tmp2, tmp3;
+    vec_float4 product, l_wxyz, r_wxyz, xy, qw;
+    ldata = quat0->vec128;
+    rdata = quat1->vec128;
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
+    tmp0 = spu_shuffle( ldata, ldata, _VECTORMATH_SHUF_YZXW );
+    tmp1 = spu_shuffle( rdata, rdata, _VECTORMATH_SHUF_ZXYW );
+    tmp2 = spu_shuffle( ldata, ldata, _VECTORMATH_SHUF_ZXYW );
+    tmp3 = spu_shuffle( rdata, rdata, _VECTORMATH_SHUF_YZXW );
+    qv = spu_mul( spu_shuffle( ldata, ldata, shuffle_wwww ), rdata );
+    qv = spu_madd( spu_shuffle( rdata, rdata, shuffle_wwww ), ldata, qv );
+    qv = spu_madd( tmp0, tmp1, qv );
+    qv = spu_nmsub( tmp2, tmp3, qv );
+    product = spu_mul( ldata, rdata );
+    l_wxyz = spu_rlqwbyte( ldata, 12 );
+    r_wxyz = spu_rlqwbyte( rdata, 12 );
+    qw = spu_nmsub( l_wxyz, r_wxyz, product );
+    xy = spu_madd( l_wxyz, r_wxyz, product );
+    qw = spu_sub( qw, spu_rlqwbyte( xy, 8 ) );
+    result->vec128 = spu_sel( qv, qw, (vec_uint4)spu_maskb( 0x000f ) );
+}
+
+static inline void vmathQRotate( VmathVector3 *result, const VmathQuat *quat, const VmathVector3 *vec )
+{
+    vec_float4 qdata, vdata, product, tmp0, tmp1, tmp2, tmp3, wwww, qv, qw, res;
+    qdata = quat->vec128;
+    vdata = vec->vec128;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
+    tmp0 = spu_shuffle( qdata, qdata, _VECTORMATH_SHUF_YZXW );
+    tmp1 = spu_shuffle( vdata, vdata, _VECTORMATH_SHUF_ZXYW );
+    tmp2 = spu_shuffle( qdata, qdata, _VECTORMATH_SHUF_ZXYW );
+    tmp3 = spu_shuffle( vdata, vdata, _VECTORMATH_SHUF_YZXW );
+    wwww = spu_shuffle( qdata, qdata, shuffle_wwww );
+    qv = spu_mul( wwww, vdata );
+    qv = spu_madd( tmp0, tmp1, qv );
+    qv = spu_nmsub( tmp2, tmp3, qv );
+    product = spu_mul( qdata, vdata );
+    qw = spu_madd( spu_rlqwbyte( qdata, 4 ), spu_rlqwbyte( vdata, 4 ), product );
+    qw = spu_add( spu_rlqwbyte( product, 8 ), qw );
+    tmp1 = spu_shuffle( qv, qv, _VECTORMATH_SHUF_ZXYW );
+    tmp3 = spu_shuffle( qv, qv, _VECTORMATH_SHUF_YZXW );
+    res = spu_mul( spu_shuffle( qw, qw, shuffle_xxxx ), qdata );
+    res = spu_madd( wwww, qv, res );
+    res = spu_madd( tmp0, tmp1, res );
+    res = spu_nmsub( tmp2, tmp3, res );
+    result->vec128 = res;
+}
+
+static inline void vmathQConj( VmathQuat *result, const VmathQuat *quat )
+{
+    result->vec128 = spu_xor( quat->vec128, ((vec_float4)(vec_int4){0x80000000,0x80000000,0x80000000,0}) );
+}
+
+static inline void vmathQSelect( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, unsigned int select1 )
+{
+    result->vec128 = spu_sel( quat0->vec128, quat1->vec128, spu_splats( (unsigned int)-(select1 > 0) ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathQPrint( const VmathQuat *quat )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = quat->vec128;
+    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+static inline void vmathQPrints( const VmathQuat *quat, const char *name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = quat->vec128;
+    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/quat_aos_v.h b/Extras/vectormathlibrary/include/vectormath/spu/c/quat_aos_v.h
index cc519d805..04cf6ccf9 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/quat_aos_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/quat_aos_v.h
@@ -1,312 +1,312 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_AOS_V_C_H
-#define _VECTORMATH_QUAT_AOS_V_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline VmathQuat vmathQMakeFromElems_V( float _x, float _y, float _z, float _w )
-{
-    VmathQuat result;
-    vmathQMakeFromElems(&result, _x, _y, _z, _w);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeFromV3Scalar_V( VmathVector3 xyz, float _w )
-{
-    VmathQuat result;
-    vmathQMakeFromV3Scalar(&result, &xyz, _w);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeFromV4_V( VmathVector4 vec )
-{
-    VmathQuat result;
-    vmathQMakeFromV4(&result, &vec);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeFromScalar_V( float scalar )
-{
-    VmathQuat result;
-    vmathQMakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeFrom128_V( vec_float4 vf4 )
-{
-    VmathQuat result;
-    vmathQMakeFrom128(&result, vf4);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeIdentity_V( )
-{
-    VmathQuat result;
-    vmathQMakeIdentity(&result);
-    return result;
-}
-
-static inline VmathQuat vmathQLerp_V( float t, VmathQuat quat0, VmathQuat quat1 )
-{
-    VmathQuat result;
-    vmathQLerp(&result, t, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathQuat vmathQSlerp_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1 )
-{
-    VmathQuat result;
-    vmathQSlerp(&result, t, &unitQuat0, &unitQuat1);
-    return result;
-}
-
-static inline VmathQuat vmathQSquad_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1, VmathQuat unitQuat2, VmathQuat unitQuat3 )
-{
-    VmathQuat result;
-    vmathQSquad(&result, t, &unitQuat0, &unitQuat1, &unitQuat2, &unitQuat3);
-    return result;
-}
-
-static inline vec_float4 vmathQGet128_V( VmathQuat quat )
-{
-    return vmathQGet128(&quat);
-}
-
-static inline void vmathQSetXYZ_V( VmathQuat *result, VmathVector3 vec )
-{
-    vmathQSetXYZ(result, &vec);
-}
-
-static inline VmathVector3 vmathQGetXYZ_V( VmathQuat quat )
-{
-    VmathVector3 result;
-    vmathQGetXYZ(&result, &quat);
-    return result;
-}
-
-static inline void vmathQSetX_V( VmathQuat *result, float _x )
-{
-    vmathQSetX(result, _x);
-}
-
-static inline float vmathQGetX_V( VmathQuat quat )
-{
-    return vmathQGetX(&quat);
-}
-
-static inline void vmathQSetY_V( VmathQuat *result, float _y )
-{
-    vmathQSetY(result, _y);
-}
-
-static inline float vmathQGetY_V( VmathQuat quat )
-{
-    return vmathQGetY(&quat);
-}
-
-static inline void vmathQSetZ_V( VmathQuat *result, float _z )
-{
-    vmathQSetZ(result, _z);
-}
-
-static inline float vmathQGetZ_V( VmathQuat quat )
-{
-    return vmathQGetZ(&quat);
-}
-
-static inline void vmathQSetW_V( VmathQuat *result, float _w )
-{
-    vmathQSetW(result, _w);
-}
-
-static inline float vmathQGetW_V( VmathQuat quat )
-{
-    return vmathQGetW(&quat);
-}
-
-static inline void vmathQSetElem_V( VmathQuat *result, int idx, float value )
-{
-    vmathQSetElem(result, idx, value);
-}
-
-static inline float vmathQGetElem_V( VmathQuat quat, int idx )
-{
-    return vmathQGetElem(&quat, idx);
-}
-
-static inline VmathQuat vmathQAdd_V( VmathQuat quat0, VmathQuat quat1 )
-{
-    VmathQuat result;
-    vmathQAdd(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathQuat vmathQSub_V( VmathQuat quat0, VmathQuat quat1 )
-{
-    VmathQuat result;
-    vmathQSub(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathQuat vmathQScalarMul_V( VmathQuat quat, float scalar )
-{
-    VmathQuat result;
-    vmathQScalarMul(&result, &quat, scalar);
-    return result;
-}
-
-static inline VmathQuat vmathQScalarDiv_V( VmathQuat quat, float scalar )
-{
-    VmathQuat result;
-    vmathQScalarDiv(&result, &quat, scalar);
-    return result;
-}
-
-static inline VmathQuat vmathQNeg_V( VmathQuat quat )
-{
-    VmathQuat result;
-    vmathQNeg(&result, &quat);
-    return result;
-}
-
-static inline float vmathQDot_V( VmathQuat quat0, VmathQuat quat1 )
-{
-    return vmathQDot(&quat0, &quat1);
-}
-
-static inline float vmathQNorm_V( VmathQuat quat )
-{
-    return vmathQNorm(&quat);
-}
-
-static inline float vmathQLength_V( VmathQuat quat )
-{
-    return vmathQLength(&quat);
-}
-
-static inline VmathQuat vmathQNormalize_V( VmathQuat quat )
-{
-    VmathQuat result;
-    vmathQNormalize(&result, &quat);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationArc_V( VmathVector3 unitVec0, VmathVector3 unitVec1 )
-{
-    VmathQuat result;
-    vmathQMakeRotationArc(&result, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationAxis_V( float radians, VmathVector3 unitVec )
-{
-    VmathQuat result;
-    vmathQMakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationX_V( float radians )
-{
-    VmathQuat result;
-    vmathQMakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationY_V( float radians )
-{
-    VmathQuat result;
-    vmathQMakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationZ_V( float radians )
-{
-    VmathQuat result;
-    vmathQMakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathQuat vmathQMul_V( VmathQuat quat0, VmathQuat quat1 )
-{
-    VmathQuat result;
-    vmathQMul(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathVector3 vmathQRotate_V( VmathQuat quat, VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathQRotate(&result, &quat, &vec);
-    return result;
-}
-
-static inline VmathQuat vmathQConj_V( VmathQuat quat )
-{
-    VmathQuat result;
-    vmathQConj(&result, &quat);
-    return result;
-}
-
-static inline VmathQuat vmathQSelect_V( VmathQuat quat0, VmathQuat quat1, unsigned int select1 )
-{
-    VmathQuat result;
-    vmathQSelect(&result, &quat0, &quat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathQPrint_V( VmathQuat quat )
-{
-    vmathQPrint(&quat);
-}
-
-static inline void vmathQPrints_V( VmathQuat quat, const char *name )
-{
-    vmathQPrints(&quat, name);
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_AOS_V_C_H
+#define _VECTORMATH_QUAT_AOS_V_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline VmathQuat vmathQMakeFromElems_V( float _x, float _y, float _z, float _w )
+{
+    VmathQuat result;
+    vmathQMakeFromElems(&result, _x, _y, _z, _w);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeFromV3Scalar_V( VmathVector3 xyz, float _w )
+{
+    VmathQuat result;
+    vmathQMakeFromV3Scalar(&result, &xyz, _w);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeFromV4_V( VmathVector4 vec )
+{
+    VmathQuat result;
+    vmathQMakeFromV4(&result, &vec);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeFromScalar_V( float scalar )
+{
+    VmathQuat result;
+    vmathQMakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeFrom128_V( vec_float4 vf4 )
+{
+    VmathQuat result;
+    vmathQMakeFrom128(&result, vf4);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeIdentity_V( )
+{
+    VmathQuat result;
+    vmathQMakeIdentity(&result);
+    return result;
+}
+
+static inline VmathQuat vmathQLerp_V( float t, VmathQuat quat0, VmathQuat quat1 )
+{
+    VmathQuat result;
+    vmathQLerp(&result, t, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathQuat vmathQSlerp_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1 )
+{
+    VmathQuat result;
+    vmathQSlerp(&result, t, &unitQuat0, &unitQuat1);
+    return result;
+}
+
+static inline VmathQuat vmathQSquad_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1, VmathQuat unitQuat2, VmathQuat unitQuat3 )
+{
+    VmathQuat result;
+    vmathQSquad(&result, t, &unitQuat0, &unitQuat1, &unitQuat2, &unitQuat3);
+    return result;
+}
+
+static inline vec_float4 vmathQGet128_V( VmathQuat quat )
+{
+    return vmathQGet128(&quat);
+}
+
+static inline void vmathQSetXYZ_V( VmathQuat *result, VmathVector3 vec )
+{
+    vmathQSetXYZ(result, &vec);
+}
+
+static inline VmathVector3 vmathQGetXYZ_V( VmathQuat quat )
+{
+    VmathVector3 result;
+    vmathQGetXYZ(&result, &quat);
+    return result;
+}
+
+static inline void vmathQSetX_V( VmathQuat *result, float _x )
+{
+    vmathQSetX(result, _x);
+}
+
+static inline float vmathQGetX_V( VmathQuat quat )
+{
+    return vmathQGetX(&quat);
+}
+
+static inline void vmathQSetY_V( VmathQuat *result, float _y )
+{
+    vmathQSetY(result, _y);
+}
+
+static inline float vmathQGetY_V( VmathQuat quat )
+{
+    return vmathQGetY(&quat);
+}
+
+static inline void vmathQSetZ_V( VmathQuat *result, float _z )
+{
+    vmathQSetZ(result, _z);
+}
+
+static inline float vmathQGetZ_V( VmathQuat quat )
+{
+    return vmathQGetZ(&quat);
+}
+
+static inline void vmathQSetW_V( VmathQuat *result, float _w )
+{
+    vmathQSetW(result, _w);
+}
+
+static inline float vmathQGetW_V( VmathQuat quat )
+{
+    return vmathQGetW(&quat);
+}
+
+static inline void vmathQSetElem_V( VmathQuat *result, int idx, float value )
+{
+    vmathQSetElem(result, idx, value);
+}
+
+static inline float vmathQGetElem_V( VmathQuat quat, int idx )
+{
+    return vmathQGetElem(&quat, idx);
+}
+
+static inline VmathQuat vmathQAdd_V( VmathQuat quat0, VmathQuat quat1 )
+{
+    VmathQuat result;
+    vmathQAdd(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathQuat vmathQSub_V( VmathQuat quat0, VmathQuat quat1 )
+{
+    VmathQuat result;
+    vmathQSub(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathQuat vmathQScalarMul_V( VmathQuat quat, float scalar )
+{
+    VmathQuat result;
+    vmathQScalarMul(&result, &quat, scalar);
+    return result;
+}
+
+static inline VmathQuat vmathQScalarDiv_V( VmathQuat quat, float scalar )
+{
+    VmathQuat result;
+    vmathQScalarDiv(&result, &quat, scalar);
+    return result;
+}
+
+static inline VmathQuat vmathQNeg_V( VmathQuat quat )
+{
+    VmathQuat result;
+    vmathQNeg(&result, &quat);
+    return result;
+}
+
+static inline float vmathQDot_V( VmathQuat quat0, VmathQuat quat1 )
+{
+    return vmathQDot(&quat0, &quat1);
+}
+
+static inline float vmathQNorm_V( VmathQuat quat )
+{
+    return vmathQNorm(&quat);
+}
+
+static inline float vmathQLength_V( VmathQuat quat )
+{
+    return vmathQLength(&quat);
+}
+
+static inline VmathQuat vmathQNormalize_V( VmathQuat quat )
+{
+    VmathQuat result;
+    vmathQNormalize(&result, &quat);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationArc_V( VmathVector3 unitVec0, VmathVector3 unitVec1 )
+{
+    VmathQuat result;
+    vmathQMakeRotationArc(&result, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationAxis_V( float radians, VmathVector3 unitVec )
+{
+    VmathQuat result;
+    vmathQMakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationX_V( float radians )
+{
+    VmathQuat result;
+    vmathQMakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationY_V( float radians )
+{
+    VmathQuat result;
+    vmathQMakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationZ_V( float radians )
+{
+    VmathQuat result;
+    vmathQMakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathQuat vmathQMul_V( VmathQuat quat0, VmathQuat quat1 )
+{
+    VmathQuat result;
+    vmathQMul(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathVector3 vmathQRotate_V( VmathQuat quat, VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathQRotate(&result, &quat, &vec);
+    return result;
+}
+
+static inline VmathQuat vmathQConj_V( VmathQuat quat )
+{
+    VmathQuat result;
+    vmathQConj(&result, &quat);
+    return result;
+}
+
+static inline VmathQuat vmathQSelect_V( VmathQuat quat0, VmathQuat quat1, unsigned int select1 )
+{
+    VmathQuat result;
+    vmathQSelect(&result, &quat0, &quat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathQPrint_V( VmathQuat quat )
+{
+    vmathQPrint(&quat);
+}
+
+static inline void vmathQPrints_V( VmathQuat quat, const char *name )
+{
+    vmathQPrints(&quat, name);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/quat_soa.h b/Extras/vectormathlibrary/include/vectormath/spu/c/quat_soa.h
index d133092f3..cd79e9d8d 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/quat_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/quat_soa.h
@@ -1,419 +1,419 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_SOA_C_H
-#define _VECTORMATH_QUAT_SOA_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline void vmathSoaQCopy( VmathSoaQuat *result, const VmathSoaQuat *quat )
-{
-    result->x = quat->x;
-    result->y = quat->y;
-    result->z = quat->z;
-    result->w = quat->w;
-}
-
-static inline void vmathSoaQMakeFromElems( VmathSoaQuat *result, vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
-{
-    result->x = _x;
-    result->y = _y;
-    result->z = _z;
-    result->w = _w;
-}
-
-static inline void vmathSoaQMakeFromV3Scalar( VmathSoaQuat *result, const VmathSoaVector3 *xyz, vec_float4 _w )
-{
-    vmathSoaQSetXYZ( result, xyz );
-    vmathSoaQSetW( result, _w );
-}
-
-static inline void vmathSoaQMakeFromV4( VmathSoaQuat *result, const VmathSoaVector4 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-    result->w = vec->w;
-}
-
-static inline void vmathSoaQMakeFromScalar( VmathSoaQuat *result, vec_float4 scalar )
-{
-    result->x = scalar;
-    result->y = scalar;
-    result->z = scalar;
-    result->w = scalar;
-}
-
-static inline void vmathSoaQMakeFromAos( VmathSoaQuat *result, const VmathQuat *quat )
-{
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
-    vec_float4 vec128 = quat->vec128;
-    result->x = spu_shuffle( vec128, vec128, shuffle_xxxx );
-    result->y = spu_shuffle( vec128, vec128, shuffle_yyyy );
-    result->z = spu_shuffle( vec128, vec128, shuffle_zzzz );
-    result->w = spu_shuffle( vec128, vec128, shuffle_wwww );
-}
-
-static inline void vmathSoaQMakeFrom4Aos( VmathSoaQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, const VmathQuat *quat2, const VmathQuat *quat3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = spu_shuffle( quat0->vec128, quat2->vec128, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( quat1->vec128, quat3->vec128, _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( quat0->vec128, quat2->vec128, _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( quat1->vec128, quat3->vec128, _VECTORMATH_SHUF_ZCWD );
-    result->x = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
-    result->y = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
-    result->z = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
-    result->w = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
-}
-
-static inline void vmathSoaQMakeIdentity( VmathSoaQuat *result )
-{
-    vmathSoaQMakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
-}
-
-static inline void vmathSoaQLerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
-{
-    VmathSoaQuat tmpQ_0, tmpQ_1;
-    vmathSoaQSub( &tmpQ_0, quat1, quat0 );
-    vmathSoaQScalarMul( &tmpQ_1, &tmpQ_0, t );
-    vmathSoaQAdd( result, quat0, &tmpQ_1 );
-}
-
-static inline void vmathSoaQSlerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1 )
-{
-    VmathSoaQuat start, tmpQ_0, tmpQ_1;
-    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
-    vec_uint4 selectMask;
-    cosAngle = vmathSoaQDot( unitQuat0, unitQuat1 );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(0.0f), cosAngle );
-    cosAngle = spu_sel( cosAngle, negatef4( cosAngle ), selectMask );
-    vmathSoaQSetX( &start, spu_sel( unitQuat0->x, negatef4( unitQuat0->x ), selectMask ) );
-    vmathSoaQSetY( &start, spu_sel( unitQuat0->y, negatef4( unitQuat0->y ), selectMask ) );
-    vmathSoaQSetZ( &start, spu_sel( unitQuat0->z, negatef4( unitQuat0->z ), selectMask ) );
-    vmathSoaQSetW( &start, spu_sel( unitQuat0->w, negatef4( unitQuat0->w ), selectMask ) );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    recipSinAngle = recipf4( sinf4( angle ) );
-    scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
-    scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
-    vmathSoaQScalarMul( &tmpQ_0, &start, scale0 );
-    vmathSoaQScalarMul( &tmpQ_1, unitQuat1, scale1 );
-    vmathSoaQAdd( result, &tmpQ_0, &tmpQ_1 );
-}
-
-static inline void vmathSoaQSquad( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1, const VmathSoaQuat *unitQuat2, const VmathSoaQuat *unitQuat3 )
-{
-    VmathSoaQuat tmp0, tmp1;
-    vmathSoaQSlerp( &tmp0, t, unitQuat0, unitQuat3 );
-    vmathSoaQSlerp( &tmp1, t, unitQuat1, unitQuat2 );
-    vmathSoaQSlerp( result, spu_mul( spu_mul( spu_splats(2.0f), t ), spu_sub( spu_splats(1.0f), t ) ), &tmp0, &tmp1 );
-}
-
-static inline void vmathSoaQGet4Aos( const VmathSoaQuat *quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = spu_shuffle( quat->x, quat->z, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( quat->y, quat->w, _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( quat->x, quat->z, _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( quat->y, quat->w, _VECTORMATH_SHUF_ZCWD );
-    vmathQMakeFrom128( result0, spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB ) );
-    vmathQMakeFrom128( result1, spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD ) );
-    vmathQMakeFrom128( result2, spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB ) );
-    vmathQMakeFrom128( result3, spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD ) );
-}
-
-static inline void vmathSoaQSetXYZ( VmathSoaQuat *result, const VmathSoaVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-}
-
-static inline void vmathSoaQGetXYZ( VmathSoaVector3 *result, const VmathSoaQuat *quat )
-{
-    vmathSoaV3MakeFromElems( result, quat->x, quat->y, quat->z );
-}
-
-static inline void vmathSoaQSetX( VmathSoaQuat *result, vec_float4 _x )
-{
-    result->x = _x;
-}
-
-static inline vec_float4 vmathSoaQGetX( const VmathSoaQuat *quat )
-{
-    return quat->x;
-}
-
-static inline void vmathSoaQSetY( VmathSoaQuat *result, vec_float4 _y )
-{
-    result->y = _y;
-}
-
-static inline vec_float4 vmathSoaQGetY( const VmathSoaQuat *quat )
-{
-    return quat->y;
-}
-
-static inline void vmathSoaQSetZ( VmathSoaQuat *result, vec_float4 _z )
-{
-    result->z = _z;
-}
-
-static inline vec_float4 vmathSoaQGetZ( const VmathSoaQuat *quat )
-{
-    return quat->z;
-}
-
-static inline void vmathSoaQSetW( VmathSoaQuat *result, vec_float4 _w )
-{
-    result->w = _w;
-}
-
-static inline vec_float4 vmathSoaQGetW( const VmathSoaQuat *quat )
-{
-    return quat->w;
-}
-
-static inline void vmathSoaQSetElem( VmathSoaQuat *result, int idx, vec_float4 value )
-{
-    *(&result->x + idx) = value;
-}
-
-static inline vec_float4 vmathSoaQGetElem( const VmathSoaQuat *quat, int idx )
-{
-    return *(&quat->x + idx);
-}
-
-static inline void vmathSoaQAdd( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
-{
-    result->x = spu_add( quat0->x, quat1->x );
-    result->y = spu_add( quat0->y, quat1->y );
-    result->z = spu_add( quat0->z, quat1->z );
-    result->w = spu_add( quat0->w, quat1->w );
-}
-
-static inline void vmathSoaQSub( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
-{
-    result->x = spu_sub( quat0->x, quat1->x );
-    result->y = spu_sub( quat0->y, quat1->y );
-    result->z = spu_sub( quat0->z, quat1->z );
-    result->w = spu_sub( quat0->w, quat1->w );
-}
-
-static inline void vmathSoaQScalarMul( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar )
-{
-    result->x = spu_mul( quat->x, scalar );
-    result->y = spu_mul( quat->y, scalar );
-    result->z = spu_mul( quat->z, scalar );
-    result->w = spu_mul( quat->w, scalar );
-}
-
-static inline void vmathSoaQScalarDiv( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar )
-{
-    result->x = divf4( quat->x, scalar );
-    result->y = divf4( quat->y, scalar );
-    result->z = divf4( quat->z, scalar );
-    result->w = divf4( quat->w, scalar );
-}
-
-static inline void vmathSoaQNeg( VmathSoaQuat *result, const VmathSoaQuat *quat )
-{
-    result->x = negatef4( quat->x );
-    result->y = negatef4( quat->y );
-    result->z = negatef4( quat->z );
-    result->w = negatef4( quat->w );
-}
-
-static inline vec_float4 vmathSoaQDot( const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
-{
-    vec_float4 result;
-    result = spu_mul( quat0->x, quat1->x );
-    result = spu_add( result, spu_mul( quat0->y, quat1->y ) );
-    result = spu_add( result, spu_mul( quat0->z, quat1->z ) );
-    result = spu_add( result, spu_mul( quat0->w, quat1->w ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaQNorm( const VmathSoaQuat *quat )
-{
-    vec_float4 result;
-    result = spu_mul( quat->x, quat->x );
-    result = spu_add( result, spu_mul( quat->y, quat->y ) );
-    result = spu_add( result, spu_mul( quat->z, quat->z ) );
-    result = spu_add( result, spu_mul( quat->w, quat->w ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaQLength( const VmathSoaQuat *quat )
-{
-    return sqrtf4( vmathSoaQNorm( quat ) );
-}
-
-static inline void vmathSoaQNormalize( VmathSoaQuat *result, const VmathSoaQuat *quat )
-{
-    vec_float4 lenSqr, lenInv;
-    lenSqr = vmathSoaQNorm( quat );
-    lenInv = rsqrtf4( lenSqr );
-    result->x = spu_mul( quat->x, lenInv );
-    result->y = spu_mul( quat->y, lenInv );
-    result->z = spu_mul( quat->z, lenInv );
-    result->w = spu_mul( quat->w, lenInv );
-}
-
-static inline void vmathSoaQMakeRotationArc( VmathSoaQuat *result, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 )
-{
-    VmathSoaVector3 tmpV3_0, tmpV3_1;
-    vec_float4 cosHalfAngleX2, recipCosHalfAngleX2;
-    cosHalfAngleX2 = sqrtf4( spu_mul( spu_splats(2.0f), spu_add( spu_splats(1.0f), vmathSoaV3Dot( unitVec0, unitVec1 ) ) ) );
-    recipCosHalfAngleX2 = recipf4( cosHalfAngleX2 );
-    vmathSoaV3Cross( &tmpV3_0, unitVec0, unitVec1 );
-    vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, recipCosHalfAngleX2 );
-    vmathSoaQMakeFromV3Scalar( result, &tmpV3_1, spu_mul( cosHalfAngleX2, spu_splats(0.5f) ) );
-}
-
-static inline void vmathSoaQMakeRotationAxis( VmathSoaQuat *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
-{
-    VmathSoaVector3 tmpV3_0;
-    vec_float4 s, c, angle;
-    angle = spu_mul( radians, spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    vmathSoaV3ScalarMul( &tmpV3_0, unitVec, s );
-    vmathSoaQMakeFromV3Scalar( result, &tmpV3_0, c );
-}
-
-static inline void vmathSoaQMakeRotationX( VmathSoaQuat *result, vec_float4 radians )
-{
-    vec_float4 s, c, angle;
-    angle = spu_mul( radians, spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    vmathSoaQMakeFromElems( result, s, spu_splats(0.0f), spu_splats(0.0f), c );
-}
-
-static inline void vmathSoaQMakeRotationY( VmathSoaQuat *result, vec_float4 radians )
-{
-    vec_float4 s, c, angle;
-    angle = spu_mul( radians, spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    vmathSoaQMakeFromElems( result, spu_splats(0.0f), s, spu_splats(0.0f), c );
-}
-
-static inline void vmathSoaQMakeRotationZ( VmathSoaQuat *result, vec_float4 radians )
-{
-    vec_float4 s, c, angle;
-    angle = spu_mul( radians, spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    vmathSoaQMakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), s, c );
-}
-
-static inline void vmathSoaQMul( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
-{
-    vec_float4 tmpX, tmpY, tmpZ, tmpW;
-    tmpX = spu_sub( spu_add( spu_add( spu_mul( quat0->w, quat1->x ), spu_mul( quat0->x, quat1->w ) ), spu_mul( quat0->y, quat1->z ) ), spu_mul( quat0->z, quat1->y ) );
-    tmpY = spu_sub( spu_add( spu_add( spu_mul( quat0->w, quat1->y ), spu_mul( quat0->y, quat1->w ) ), spu_mul( quat0->z, quat1->x ) ), spu_mul( quat0->x, quat1->z ) );
-    tmpZ = spu_sub( spu_add( spu_add( spu_mul( quat0->w, quat1->z ), spu_mul( quat0->z, quat1->w ) ), spu_mul( quat0->x, quat1->y ) ), spu_mul( quat0->y, quat1->x ) );
-    tmpW = spu_sub( spu_sub( spu_sub( spu_mul( quat0->w, quat1->w ), spu_mul( quat0->x, quat1->x ) ), spu_mul( quat0->y, quat1->y ) ), spu_mul( quat0->z, quat1->z ) );
-    vmathSoaQMakeFromElems( result, tmpX, tmpY, tmpZ, tmpW );
-}
-
-static inline void vmathSoaQRotate( VmathSoaVector3 *result, const VmathSoaQuat *quat, const VmathSoaVector3 *vec )
-{
-    vec_float4 tmpX, tmpY, tmpZ, tmpW;
-    tmpX = spu_sub( spu_add( spu_mul( quat->w, vec->x ), spu_mul( quat->y, vec->z ) ), spu_mul( quat->z, vec->y ) );
-    tmpY = spu_sub( spu_add( spu_mul( quat->w, vec->y ), spu_mul( quat->z, vec->x ) ), spu_mul( quat->x, vec->z ) );
-    tmpZ = spu_sub( spu_add( spu_mul( quat->w, vec->z ), spu_mul( quat->x, vec->y ) ), spu_mul( quat->y, vec->x ) );
-    tmpW = spu_add( spu_add( spu_mul( quat->x, vec->x ), spu_mul( quat->y, vec->y ) ), spu_mul( quat->z, vec->z ) );
-    result->x = spu_add( spu_sub( spu_add( spu_mul( tmpW, quat->x ), spu_mul( tmpX, quat->w ) ), spu_mul( tmpY, quat->z ) ), spu_mul( tmpZ, quat->y ) );
-    result->y = spu_add( spu_sub( spu_add( spu_mul( tmpW, quat->y ), spu_mul( tmpY, quat->w ) ), spu_mul( tmpZ, quat->x ) ), spu_mul( tmpX, quat->z ) );
-    result->z = spu_add( spu_sub( spu_add( spu_mul( tmpW, quat->z ), spu_mul( tmpZ, quat->w ) ), spu_mul( tmpX, quat->y ) ), spu_mul( tmpY, quat->x ) );
-}
-
-static inline void vmathSoaQConj( VmathSoaQuat *result, const VmathSoaQuat *quat )
-{
-    vmathSoaQMakeFromElems( result, negatef4( quat->x ), negatef4( quat->y ), negatef4( quat->z ), quat->w );
-}
-
-static inline void vmathSoaQSelect( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1, vec_uint4 select1 )
-{
-    result->x = spu_sel( quat0->x, quat1->x, select1 );
-    result->y = spu_sel( quat0->y, quat1->y, select1 );
-    result->z = spu_sel( quat0->z, quat1->z, select1 );
-    result->w = spu_sel( quat0->w, quat1->w, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaQPrint( const VmathSoaQuat *quat )
-{
-    VmathQuat vec0, vec1, vec2, vec3;
-    vmathSoaQGet4Aos( quat, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathQPrint( &vec0 );
-    printf("slot 1:\n");
-    vmathQPrint( &vec1 );
-    printf("slot 2:\n");
-    vmathQPrint( &vec2 );
-    printf("slot 3:\n");
-    vmathQPrint( &vec3 );
-}
-
-static inline void vmathSoaQPrints( const VmathSoaQuat *quat, const char *name )
-{
-    VmathQuat vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    vmathSoaQGet4Aos( quat, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathQPrint( &vec0 );
-    printf("slot 1:\n");
-    vmathQPrint( &vec1 );
-    printf("slot 2:\n");
-    vmathQPrint( &vec2 );
-    printf("slot 3:\n");
-    vmathQPrint( &vec3 );
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_SOA_C_H
+#define _VECTORMATH_QUAT_SOA_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline void vmathSoaQCopy( VmathSoaQuat *result, const VmathSoaQuat *quat )
+{
+    result->x = quat->x;
+    result->y = quat->y;
+    result->z = quat->z;
+    result->w = quat->w;
+}
+
+static inline void vmathSoaQMakeFromElems( VmathSoaQuat *result, vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
+{
+    result->x = _x;
+    result->y = _y;
+    result->z = _z;
+    result->w = _w;
+}
+
+static inline void vmathSoaQMakeFromV3Scalar( VmathSoaQuat *result, const VmathSoaVector3 *xyz, vec_float4 _w )
+{
+    vmathSoaQSetXYZ( result, xyz );
+    vmathSoaQSetW( result, _w );
+}
+
+static inline void vmathSoaQMakeFromV4( VmathSoaQuat *result, const VmathSoaVector4 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+    result->w = vec->w;
+}
+
+static inline void vmathSoaQMakeFromScalar( VmathSoaQuat *result, vec_float4 scalar )
+{
+    result->x = scalar;
+    result->y = scalar;
+    result->z = scalar;
+    result->w = scalar;
+}
+
+static inline void vmathSoaQMakeFromAos( VmathSoaQuat *result, const VmathQuat *quat )
+{
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
+    vec_float4 vec128 = quat->vec128;
+    result->x = spu_shuffle( vec128, vec128, shuffle_xxxx );
+    result->y = spu_shuffle( vec128, vec128, shuffle_yyyy );
+    result->z = spu_shuffle( vec128, vec128, shuffle_zzzz );
+    result->w = spu_shuffle( vec128, vec128, shuffle_wwww );
+}
+
+static inline void vmathSoaQMakeFrom4Aos( VmathSoaQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, const VmathQuat *quat2, const VmathQuat *quat3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = spu_shuffle( quat0->vec128, quat2->vec128, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( quat1->vec128, quat3->vec128, _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( quat0->vec128, quat2->vec128, _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( quat1->vec128, quat3->vec128, _VECTORMATH_SHUF_ZCWD );
+    result->x = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
+    result->y = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
+    result->z = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
+    result->w = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
+}
+
+static inline void vmathSoaQMakeIdentity( VmathSoaQuat *result )
+{
+    vmathSoaQMakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
+}
+
+static inline void vmathSoaQLerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
+{
+    VmathSoaQuat tmpQ_0, tmpQ_1;
+    vmathSoaQSub( &tmpQ_0, quat1, quat0 );
+    vmathSoaQScalarMul( &tmpQ_1, &tmpQ_0, t );
+    vmathSoaQAdd( result, quat0, &tmpQ_1 );
+}
+
+static inline void vmathSoaQSlerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1 )
+{
+    VmathSoaQuat start, tmpQ_0, tmpQ_1;
+    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
+    vec_uint4 selectMask;
+    cosAngle = vmathSoaQDot( unitQuat0, unitQuat1 );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(0.0f), cosAngle );
+    cosAngle = spu_sel( cosAngle, negatef4( cosAngle ), selectMask );
+    vmathSoaQSetX( &start, spu_sel( unitQuat0->x, negatef4( unitQuat0->x ), selectMask ) );
+    vmathSoaQSetY( &start, spu_sel( unitQuat0->y, negatef4( unitQuat0->y ), selectMask ) );
+    vmathSoaQSetZ( &start, spu_sel( unitQuat0->z, negatef4( unitQuat0->z ), selectMask ) );
+    vmathSoaQSetW( &start, spu_sel( unitQuat0->w, negatef4( unitQuat0->w ), selectMask ) );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    recipSinAngle = recipf4( sinf4( angle ) );
+    scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
+    scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
+    vmathSoaQScalarMul( &tmpQ_0, &start, scale0 );
+    vmathSoaQScalarMul( &tmpQ_1, unitQuat1, scale1 );
+    vmathSoaQAdd( result, &tmpQ_0, &tmpQ_1 );
+}
+
+static inline void vmathSoaQSquad( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1, const VmathSoaQuat *unitQuat2, const VmathSoaQuat *unitQuat3 )
+{
+    VmathSoaQuat tmp0, tmp1;
+    vmathSoaQSlerp( &tmp0, t, unitQuat0, unitQuat3 );
+    vmathSoaQSlerp( &tmp1, t, unitQuat1, unitQuat2 );
+    vmathSoaQSlerp( result, spu_mul( spu_mul( spu_splats(2.0f), t ), spu_sub( spu_splats(1.0f), t ) ), &tmp0, &tmp1 );
+}
+
+static inline void vmathSoaQGet4Aos( const VmathSoaQuat *quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = spu_shuffle( quat->x, quat->z, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( quat->y, quat->w, _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( quat->x, quat->z, _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( quat->y, quat->w, _VECTORMATH_SHUF_ZCWD );
+    vmathQMakeFrom128( result0, spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB ) );
+    vmathQMakeFrom128( result1, spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD ) );
+    vmathQMakeFrom128( result2, spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB ) );
+    vmathQMakeFrom128( result3, spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD ) );
+}
+
+static inline void vmathSoaQSetXYZ( VmathSoaQuat *result, const VmathSoaVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+}
+
+static inline void vmathSoaQGetXYZ( VmathSoaVector3 *result, const VmathSoaQuat *quat )
+{
+    vmathSoaV3MakeFromElems( result, quat->x, quat->y, quat->z );
+}
+
+static inline void vmathSoaQSetX( VmathSoaQuat *result, vec_float4 _x )
+{
+    result->x = _x;
+}
+
+static inline vec_float4 vmathSoaQGetX( const VmathSoaQuat *quat )
+{
+    return quat->x;
+}
+
+static inline void vmathSoaQSetY( VmathSoaQuat *result, vec_float4 _y )
+{
+    result->y = _y;
+}
+
+static inline vec_float4 vmathSoaQGetY( const VmathSoaQuat *quat )
+{
+    return quat->y;
+}
+
+static inline void vmathSoaQSetZ( VmathSoaQuat *result, vec_float4 _z )
+{
+    result->z = _z;
+}
+
+static inline vec_float4 vmathSoaQGetZ( const VmathSoaQuat *quat )
+{
+    return quat->z;
+}
+
+static inline void vmathSoaQSetW( VmathSoaQuat *result, vec_float4 _w )
+{
+    result->w = _w;
+}
+
+static inline vec_float4 vmathSoaQGetW( const VmathSoaQuat *quat )
+{
+    return quat->w;
+}
+
+static inline void vmathSoaQSetElem( VmathSoaQuat *result, int idx, vec_float4 value )
+{
+    *(&result->x + idx) = value;
+}
+
+static inline vec_float4 vmathSoaQGetElem( const VmathSoaQuat *quat, int idx )
+{
+    return *(&quat->x + idx);
+}
+
+static inline void vmathSoaQAdd( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
+{
+    result->x = spu_add( quat0->x, quat1->x );
+    result->y = spu_add( quat0->y, quat1->y );
+    result->z = spu_add( quat0->z, quat1->z );
+    result->w = spu_add( quat0->w, quat1->w );
+}
+
+static inline void vmathSoaQSub( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
+{
+    result->x = spu_sub( quat0->x, quat1->x );
+    result->y = spu_sub( quat0->y, quat1->y );
+    result->z = spu_sub( quat0->z, quat1->z );
+    result->w = spu_sub( quat0->w, quat1->w );
+}
+
+static inline void vmathSoaQScalarMul( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar )
+{
+    result->x = spu_mul( quat->x, scalar );
+    result->y = spu_mul( quat->y, scalar );
+    result->z = spu_mul( quat->z, scalar );
+    result->w = spu_mul( quat->w, scalar );
+}
+
+static inline void vmathSoaQScalarDiv( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar )
+{
+    result->x = divf4( quat->x, scalar );
+    result->y = divf4( quat->y, scalar );
+    result->z = divf4( quat->z, scalar );
+    result->w = divf4( quat->w, scalar );
+}
+
+static inline void vmathSoaQNeg( VmathSoaQuat *result, const VmathSoaQuat *quat )
+{
+    result->x = negatef4( quat->x );
+    result->y = negatef4( quat->y );
+    result->z = negatef4( quat->z );
+    result->w = negatef4( quat->w );
+}
+
+static inline vec_float4 vmathSoaQDot( const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
+{
+    vec_float4 result;
+    result = spu_mul( quat0->x, quat1->x );
+    result = spu_add( result, spu_mul( quat0->y, quat1->y ) );
+    result = spu_add( result, spu_mul( quat0->z, quat1->z ) );
+    result = spu_add( result, spu_mul( quat0->w, quat1->w ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaQNorm( const VmathSoaQuat *quat )
+{
+    vec_float4 result;
+    result = spu_mul( quat->x, quat->x );
+    result = spu_add( result, spu_mul( quat->y, quat->y ) );
+    result = spu_add( result, spu_mul( quat->z, quat->z ) );
+    result = spu_add( result, spu_mul( quat->w, quat->w ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaQLength( const VmathSoaQuat *quat )
+{
+    return sqrtf4( vmathSoaQNorm( quat ) );
+}
+
+static inline void vmathSoaQNormalize( VmathSoaQuat *result, const VmathSoaQuat *quat )
+{
+    vec_float4 lenSqr, lenInv;
+    lenSqr = vmathSoaQNorm( quat );
+    lenInv = rsqrtf4( lenSqr );
+    result->x = spu_mul( quat->x, lenInv );
+    result->y = spu_mul( quat->y, lenInv );
+    result->z = spu_mul( quat->z, lenInv );
+    result->w = spu_mul( quat->w, lenInv );
+}
+
+static inline void vmathSoaQMakeRotationArc( VmathSoaQuat *result, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 )
+{
+    VmathSoaVector3 tmpV3_0, tmpV3_1;
+    vec_float4 cosHalfAngleX2, recipCosHalfAngleX2;
+    cosHalfAngleX2 = sqrtf4( spu_mul( spu_splats(2.0f), spu_add( spu_splats(1.0f), vmathSoaV3Dot( unitVec0, unitVec1 ) ) ) );
+    recipCosHalfAngleX2 = recipf4( cosHalfAngleX2 );
+    vmathSoaV3Cross( &tmpV3_0, unitVec0, unitVec1 );
+    vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, recipCosHalfAngleX2 );
+    vmathSoaQMakeFromV3Scalar( result, &tmpV3_1, spu_mul( cosHalfAngleX2, spu_splats(0.5f) ) );
+}
+
+static inline void vmathSoaQMakeRotationAxis( VmathSoaQuat *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
+{
+    VmathSoaVector3 tmpV3_0;
+    vec_float4 s, c, angle;
+    angle = spu_mul( radians, spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    vmathSoaV3ScalarMul( &tmpV3_0, unitVec, s );
+    vmathSoaQMakeFromV3Scalar( result, &tmpV3_0, c );
+}
+
+static inline void vmathSoaQMakeRotationX( VmathSoaQuat *result, vec_float4 radians )
+{
+    vec_float4 s, c, angle;
+    angle = spu_mul( radians, spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    vmathSoaQMakeFromElems( result, s, spu_splats(0.0f), spu_splats(0.0f), c );
+}
+
+static inline void vmathSoaQMakeRotationY( VmathSoaQuat *result, vec_float4 radians )
+{
+    vec_float4 s, c, angle;
+    angle = spu_mul( radians, spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    vmathSoaQMakeFromElems( result, spu_splats(0.0f), s, spu_splats(0.0f), c );
+}
+
+static inline void vmathSoaQMakeRotationZ( VmathSoaQuat *result, vec_float4 radians )
+{
+    vec_float4 s, c, angle;
+    angle = spu_mul( radians, spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    vmathSoaQMakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), s, c );
+}
+
+static inline void vmathSoaQMul( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
+{
+    vec_float4 tmpX, tmpY, tmpZ, tmpW;
+    tmpX = spu_sub( spu_add( spu_add( spu_mul( quat0->w, quat1->x ), spu_mul( quat0->x, quat1->w ) ), spu_mul( quat0->y, quat1->z ) ), spu_mul( quat0->z, quat1->y ) );
+    tmpY = spu_sub( spu_add( spu_add( spu_mul( quat0->w, quat1->y ), spu_mul( quat0->y, quat1->w ) ), spu_mul( quat0->z, quat1->x ) ), spu_mul( quat0->x, quat1->z ) );
+    tmpZ = spu_sub( spu_add( spu_add( spu_mul( quat0->w, quat1->z ), spu_mul( quat0->z, quat1->w ) ), spu_mul( quat0->x, quat1->y ) ), spu_mul( quat0->y, quat1->x ) );
+    tmpW = spu_sub( spu_sub( spu_sub( spu_mul( quat0->w, quat1->w ), spu_mul( quat0->x, quat1->x ) ), spu_mul( quat0->y, quat1->y ) ), spu_mul( quat0->z, quat1->z ) );
+    vmathSoaQMakeFromElems( result, tmpX, tmpY, tmpZ, tmpW );
+}
+
+static inline void vmathSoaQRotate( VmathSoaVector3 *result, const VmathSoaQuat *quat, const VmathSoaVector3 *vec )
+{
+    vec_float4 tmpX, tmpY, tmpZ, tmpW;
+    tmpX = spu_sub( spu_add( spu_mul( quat->w, vec->x ), spu_mul( quat->y, vec->z ) ), spu_mul( quat->z, vec->y ) );
+    tmpY = spu_sub( spu_add( spu_mul( quat->w, vec->y ), spu_mul( quat->z, vec->x ) ), spu_mul( quat->x, vec->z ) );
+    tmpZ = spu_sub( spu_add( spu_mul( quat->w, vec->z ), spu_mul( quat->x, vec->y ) ), spu_mul( quat->y, vec->x ) );
+    tmpW = spu_add( spu_add( spu_mul( quat->x, vec->x ), spu_mul( quat->y, vec->y ) ), spu_mul( quat->z, vec->z ) );
+    result->x = spu_add( spu_sub( spu_add( spu_mul( tmpW, quat->x ), spu_mul( tmpX, quat->w ) ), spu_mul( tmpY, quat->z ) ), spu_mul( tmpZ, quat->y ) );
+    result->y = spu_add( spu_sub( spu_add( spu_mul( tmpW, quat->y ), spu_mul( tmpY, quat->w ) ), spu_mul( tmpZ, quat->x ) ), spu_mul( tmpX, quat->z ) );
+    result->z = spu_add( spu_sub( spu_add( spu_mul( tmpW, quat->z ), spu_mul( tmpZ, quat->w ) ), spu_mul( tmpX, quat->y ) ), spu_mul( tmpY, quat->x ) );
+}
+
+static inline void vmathSoaQConj( VmathSoaQuat *result, const VmathSoaQuat *quat )
+{
+    vmathSoaQMakeFromElems( result, negatef4( quat->x ), negatef4( quat->y ), negatef4( quat->z ), quat->w );
+}
+
+static inline void vmathSoaQSelect( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1, vec_uint4 select1 )
+{
+    result->x = spu_sel( quat0->x, quat1->x, select1 );
+    result->y = spu_sel( quat0->y, quat1->y, select1 );
+    result->z = spu_sel( quat0->z, quat1->z, select1 );
+    result->w = spu_sel( quat0->w, quat1->w, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaQPrint( const VmathSoaQuat *quat )
+{
+    VmathQuat vec0, vec1, vec2, vec3;
+    vmathSoaQGet4Aos( quat, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathQPrint( &vec0 );
+    printf("slot 1:\n");
+    vmathQPrint( &vec1 );
+    printf("slot 2:\n");
+    vmathQPrint( &vec2 );
+    printf("slot 3:\n");
+    vmathQPrint( &vec3 );
+}
+
+static inline void vmathSoaQPrints( const VmathSoaQuat *quat, const char *name )
+{
+    VmathQuat vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    vmathSoaQGet4Aos( quat, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathQPrint( &vec0 );
+    printf("slot 1:\n");
+    vmathQPrint( &vec1 );
+    printf("slot 2:\n");
+    vmathQPrint( &vec2 );
+    printf("slot 3:\n");
+    vmathQPrint( &vec3 );
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/quat_soa_v.h b/Extras/vectormathlibrary/include/vectormath/spu/c/quat_soa_v.h
index f51b43809..601d9da92 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/quat_soa_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/quat_soa_v.h
@@ -1,319 +1,319 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_SOA_V_C_H
-#define _VECTORMATH_QUAT_SOA_V_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline VmathSoaQuat vmathSoaQMakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFromElems(&result, _x, _y, _z, _w);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 _w )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFromV3Scalar(&result, &xyz, _w);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeFromV4_V( VmathSoaVector4 vec )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFromV4(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeFromAos_V( VmathQuat quat )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFromAos(&result, &quat);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeFrom4Aos_V( VmathQuat quat0, VmathQuat quat1, VmathQuat quat2, VmathQuat quat3 )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFrom4Aos(&result, &quat0, &quat1, &quat2, &quat3);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeIdentity_V( )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeIdentity(&result);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQLerp_V( vec_float4 t, VmathSoaQuat quat0, VmathSoaQuat quat1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQLerp(&result, t, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQSlerp_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQSlerp(&result, t, &unitQuat0, &unitQuat1);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQSquad_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1, VmathSoaQuat unitQuat2, VmathSoaQuat unitQuat3 )
-{
-    VmathSoaQuat result;
-    vmathSoaQSquad(&result, t, &unitQuat0, &unitQuat1, &unitQuat2, &unitQuat3);
-    return result;
-}
-
-static inline void vmathSoaQGet4Aos_V( VmathSoaQuat quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 )
-{
-    vmathSoaQGet4Aos(&quat, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaQSetXYZ_V( VmathSoaQuat *result, VmathSoaVector3 vec )
-{
-    vmathSoaQSetXYZ(result, &vec);
-}
-
-static inline VmathSoaVector3 vmathSoaQGetXYZ_V( VmathSoaQuat quat )
-{
-    VmathSoaVector3 result;
-    vmathSoaQGetXYZ(&result, &quat);
-    return result;
-}
-
-static inline void vmathSoaQSetX_V( VmathSoaQuat *result, vec_float4 _x )
-{
-    vmathSoaQSetX(result, _x);
-}
-
-static inline vec_float4 vmathSoaQGetX_V( VmathSoaQuat quat )
-{
-    return vmathSoaQGetX(&quat);
-}
-
-static inline void vmathSoaQSetY_V( VmathSoaQuat *result, vec_float4 _y )
-{
-    vmathSoaQSetY(result, _y);
-}
-
-static inline vec_float4 vmathSoaQGetY_V( VmathSoaQuat quat )
-{
-    return vmathSoaQGetY(&quat);
-}
-
-static inline void vmathSoaQSetZ_V( VmathSoaQuat *result, vec_float4 _z )
-{
-    vmathSoaQSetZ(result, _z);
-}
-
-static inline vec_float4 vmathSoaQGetZ_V( VmathSoaQuat quat )
-{
-    return vmathSoaQGetZ(&quat);
-}
-
-static inline void vmathSoaQSetW_V( VmathSoaQuat *result, vec_float4 _w )
-{
-    vmathSoaQSetW(result, _w);
-}
-
-static inline vec_float4 vmathSoaQGetW_V( VmathSoaQuat quat )
-{
-    return vmathSoaQGetW(&quat);
-}
-
-static inline void vmathSoaQSetElem_V( VmathSoaQuat *result, int idx, vec_float4 value )
-{
-    vmathSoaQSetElem(result, idx, value);
-}
-
-static inline vec_float4 vmathSoaQGetElem_V( VmathSoaQuat quat, int idx )
-{
-    return vmathSoaQGetElem(&quat, idx);
-}
-
-static inline VmathSoaQuat vmathSoaQAdd_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQAdd(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQSub_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQSub(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQScalarMul_V( VmathSoaQuat quat, vec_float4 scalar )
-{
-    VmathSoaQuat result;
-    vmathSoaQScalarMul(&result, &quat, scalar);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQScalarDiv_V( VmathSoaQuat quat, vec_float4 scalar )
-{
-    VmathSoaQuat result;
-    vmathSoaQScalarDiv(&result, &quat, scalar);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQNeg_V( VmathSoaQuat quat )
-{
-    VmathSoaQuat result;
-    vmathSoaQNeg(&result, &quat);
-    return result;
-}
-
-static inline vec_float4 vmathSoaQDot_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
-{
-    return vmathSoaQDot(&quat0, &quat1);
-}
-
-static inline vec_float4 vmathSoaQNorm_V( VmathSoaQuat quat )
-{
-    return vmathSoaQNorm(&quat);
-}
-
-static inline vec_float4 vmathSoaQLength_V( VmathSoaQuat quat )
-{
-    return vmathSoaQLength(&quat);
-}
-
-static inline VmathSoaQuat vmathSoaQNormalize_V( VmathSoaQuat quat )
-{
-    VmathSoaQuat result;
-    vmathSoaQNormalize(&result, &quat);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeRotationArc_V( VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeRotationArc(&result, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeRotationX_V( vec_float4 radians )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeRotationY_V( vec_float4 radians )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeRotationZ_V( vec_float4 radians )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMul_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQMul(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaQRotate_V( VmathSoaQuat quat, VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaQRotate(&result, &quat, &vec);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQConj_V( VmathSoaQuat quat )
-{
-    VmathSoaQuat result;
-    vmathSoaQConj(&result, &quat);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQSelect_V( VmathSoaQuat quat0, VmathSoaQuat quat1, vec_uint4 select1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQSelect(&result, &quat0, &quat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaQPrint_V( VmathSoaQuat quat )
-{
-    vmathSoaQPrint(&quat);
-}
-
-static inline void vmathSoaQPrints_V( VmathSoaQuat quat, const char *name )
-{
-    vmathSoaQPrints(&quat, name);
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_SOA_V_C_H
+#define _VECTORMATH_QUAT_SOA_V_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline VmathSoaQuat vmathSoaQMakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFromElems(&result, _x, _y, _z, _w);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 _w )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFromV3Scalar(&result, &xyz, _w);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeFromV4_V( VmathSoaVector4 vec )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFromV4(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeFromAos_V( VmathQuat quat )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFromAos(&result, &quat);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeFrom4Aos_V( VmathQuat quat0, VmathQuat quat1, VmathQuat quat2, VmathQuat quat3 )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFrom4Aos(&result, &quat0, &quat1, &quat2, &quat3);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeIdentity_V( )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeIdentity(&result);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQLerp_V( vec_float4 t, VmathSoaQuat quat0, VmathSoaQuat quat1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQLerp(&result, t, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQSlerp_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQSlerp(&result, t, &unitQuat0, &unitQuat1);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQSquad_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1, VmathSoaQuat unitQuat2, VmathSoaQuat unitQuat3 )
+{
+    VmathSoaQuat result;
+    vmathSoaQSquad(&result, t, &unitQuat0, &unitQuat1, &unitQuat2, &unitQuat3);
+    return result;
+}
+
+static inline void vmathSoaQGet4Aos_V( VmathSoaQuat quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 )
+{
+    vmathSoaQGet4Aos(&quat, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaQSetXYZ_V( VmathSoaQuat *result, VmathSoaVector3 vec )
+{
+    vmathSoaQSetXYZ(result, &vec);
+}
+
+static inline VmathSoaVector3 vmathSoaQGetXYZ_V( VmathSoaQuat quat )
+{
+    VmathSoaVector3 result;
+    vmathSoaQGetXYZ(&result, &quat);
+    return result;
+}
+
+static inline void vmathSoaQSetX_V( VmathSoaQuat *result, vec_float4 _x )
+{
+    vmathSoaQSetX(result, _x);
+}
+
+static inline vec_float4 vmathSoaQGetX_V( VmathSoaQuat quat )
+{
+    return vmathSoaQGetX(&quat);
+}
+
+static inline void vmathSoaQSetY_V( VmathSoaQuat *result, vec_float4 _y )
+{
+    vmathSoaQSetY(result, _y);
+}
+
+static inline vec_float4 vmathSoaQGetY_V( VmathSoaQuat quat )
+{
+    return vmathSoaQGetY(&quat);
+}
+
+static inline void vmathSoaQSetZ_V( VmathSoaQuat *result, vec_float4 _z )
+{
+    vmathSoaQSetZ(result, _z);
+}
+
+static inline vec_float4 vmathSoaQGetZ_V( VmathSoaQuat quat )
+{
+    return vmathSoaQGetZ(&quat);
+}
+
+static inline void vmathSoaQSetW_V( VmathSoaQuat *result, vec_float4 _w )
+{
+    vmathSoaQSetW(result, _w);
+}
+
+static inline vec_float4 vmathSoaQGetW_V( VmathSoaQuat quat )
+{
+    return vmathSoaQGetW(&quat);
+}
+
+static inline void vmathSoaQSetElem_V( VmathSoaQuat *result, int idx, vec_float4 value )
+{
+    vmathSoaQSetElem(result, idx, value);
+}
+
+static inline vec_float4 vmathSoaQGetElem_V( VmathSoaQuat quat, int idx )
+{
+    return vmathSoaQGetElem(&quat, idx);
+}
+
+static inline VmathSoaQuat vmathSoaQAdd_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQAdd(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQSub_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQSub(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQScalarMul_V( VmathSoaQuat quat, vec_float4 scalar )
+{
+    VmathSoaQuat result;
+    vmathSoaQScalarMul(&result, &quat, scalar);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQScalarDiv_V( VmathSoaQuat quat, vec_float4 scalar )
+{
+    VmathSoaQuat result;
+    vmathSoaQScalarDiv(&result, &quat, scalar);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQNeg_V( VmathSoaQuat quat )
+{
+    VmathSoaQuat result;
+    vmathSoaQNeg(&result, &quat);
+    return result;
+}
+
+static inline vec_float4 vmathSoaQDot_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
+{
+    return vmathSoaQDot(&quat0, &quat1);
+}
+
+static inline vec_float4 vmathSoaQNorm_V( VmathSoaQuat quat )
+{
+    return vmathSoaQNorm(&quat);
+}
+
+static inline vec_float4 vmathSoaQLength_V( VmathSoaQuat quat )
+{
+    return vmathSoaQLength(&quat);
+}
+
+static inline VmathSoaQuat vmathSoaQNormalize_V( VmathSoaQuat quat )
+{
+    VmathSoaQuat result;
+    vmathSoaQNormalize(&result, &quat);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeRotationArc_V( VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeRotationArc(&result, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeRotationX_V( vec_float4 radians )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeRotationY_V( vec_float4 radians )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeRotationZ_V( vec_float4 radians )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMul_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQMul(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaQRotate_V( VmathSoaQuat quat, VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaQRotate(&result, &quat, &vec);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQConj_V( VmathSoaQuat quat )
+{
+    VmathSoaQuat result;
+    vmathSoaQConj(&result, &quat);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQSelect_V( VmathSoaQuat quat0, VmathSoaQuat quat1, vec_uint4 select1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQSelect(&result, &quat0, &quat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaQPrint_V( VmathSoaQuat quat )
+{
+    vmathSoaQPrint(&quat);
+}
+
+static inline void vmathSoaQPrints_V( VmathSoaQuat quat, const char *name )
+{
+    vmathSoaQPrints(&quat, name);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/vec_aos.h b/Extras/vectormathlibrary/include/vectormath/spu/c/vec_aos.h
index 332e0db95..715f27df7 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/vec_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/vec_aos.h
@@ -1,1029 +1,1029 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_AOS_C_H
-#define _VECTORMATH_VEC_AOS_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
- */
-#define _VECTORMATH_SHUF_X 0x00010203
-#define _VECTORMATH_SHUF_Y 0x04050607
-#define _VECTORMATH_SHUF_Z 0x08090a0b
-#define _VECTORMATH_SHUF_W 0x0c0d0e0f
-#define _VECTORMATH_SHUF_A 0x10111213
-#define _VECTORMATH_SHUF_B 0x14151617
-#define _VECTORMATH_SHUF_C 0x18191a1b
-#define _VECTORMATH_SHUF_D 0x1c1d1e1f
-#define _VECTORMATH_SHUF_0 0x80808080
-#define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
-#define _VECTORMATH_SHUF_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_W }
-#define _VECTORMATH_SHUF_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W }
-#define _VECTORMATH_SHUF_WABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
-#define _VECTORMATH_SHUF_ZWAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
-#define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
-#define _VECTORMATH_SHUF_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
-#define _VECTORMATH_SHUF_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
-#define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
-#define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-static inline vec_float4 _vmathVfDot3( vec_float4 vec0, vec_float4 vec1 )
-{
-    vec_float4 result;
-    result = spu_mul( vec0, vec1 );
-    result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
-    return spu_madd( spu_rlqwbyte( vec0, 8 ), spu_rlqwbyte( vec1, 8 ), result );
-}
-
-static inline vec_float4 _vmathVfDot4( vec_float4 vec0, vec_float4 vec1 )
-{
-    vec_float4 result;
-    result = spu_mul( vec0, vec1 );
-    result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
-    return spu_add( spu_rlqwbyte( result, 8 ), result );
-}
-
-static inline vec_float4 _vmathVfCross( vec_float4 vec0, vec_float4 vec1 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3, result;
-    tmp0 = spu_shuffle( vec0, vec0, _VECTORMATH_SHUF_YZXW );
-    tmp1 = spu_shuffle( vec1, vec1, _VECTORMATH_SHUF_ZXYW );
-    tmp2 = spu_shuffle( vec0, vec0, _VECTORMATH_SHUF_ZXYW );
-    tmp3 = spu_shuffle( vec1, vec1, _VECTORMATH_SHUF_YZXW );
-    result = spu_mul( tmp0, tmp1 );
-    result = spu_nmsub( tmp2, tmp3, result );
-    return result;
-}
-
-static inline vec_uint4 _vmathVfToHalfFloatsUnpacked(vec_float4 v)
-{
-    vec_int4 bexp;
-    vec_uint4 mant, sign, hfloat;
-    vec_uint4 notZero, isInf;
-    const vec_uint4 hfloatInf = spu_splats(0x00007c00u);
-    const vec_uint4 mergeMant = spu_splats(0x000003ffu);
-    const vec_uint4 mergeSign = spu_splats(0x00008000u);
-
-    sign = spu_rlmask((vec_uint4)v, -16);
-    mant = spu_rlmask((vec_uint4)v, -13);
-    bexp = spu_and(spu_rlmask((vec_int4)v, -23), 0xff);
-
-    notZero = spu_cmpgt(bexp, 112);
-    isInf = spu_cmpgt(bexp, 142);
-
-    bexp = spu_add(bexp, -112);
-    bexp = spu_sl(bexp, 10);
-
-    hfloat = spu_sel((vec_uint4)bexp, mant, mergeMant);
-    hfloat = spu_sel(spu_splats(0u), hfloat, notZero);
-    hfloat = spu_sel(hfloat, hfloatInf, isInf);
-    hfloat = spu_sel(hfloat, sign, mergeSign);
-
-    return hfloat;
-}
-
-static inline vec_ushort8 _vmath2VfToHalfFloats(vec_float4 u, vec_float4 v)
-{
-    vec_uint4 hfloat_u, hfloat_v;
-    const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
-    hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
-    hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
-    return (vec_ushort8)spu_shuffle(hfloat_u, hfloat_v, pack);
-}
-
-#endif
-
-static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = vec->vec128;
-}
-
-static inline void vmathV3MakeFromElems( VmathVector3 *result, float _x, float _y, float _z )
-{
-    result->vec128 = (vec_float4){ _x, _y, _z, 0.0f  };
-}
-
-static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = pnt->vec128;
-}
-
-static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar )
-{
-    result->vec128 = spu_splats( scalar );
-}
-
-static inline void vmathV3MakeFrom128( VmathVector3 *result, vec_float4 vf4 )
-{
-    result->vec128 = vf4;
-}
-
-static inline void vmathV3MakeXAxis( VmathVector3 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_1000;
-}
-
-static inline void vmathV3MakeYAxis( VmathVector3 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_0100;
-}
-
-static inline void vmathV3MakeZAxis( VmathVector3 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_0010;
-}
-
-static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    VmathVector3 tmpV3_0, tmpV3_1;
-    vmathV3Sub( &tmpV3_0, vec1, vec0 );
-    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
-    vmathV3Add( result, vec0, &tmpV3_1 );
-}
-
-static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
-{
-    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    vec_uint4 selectMask;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    cosAngle = _vmathVfDot3( unitVec0->vec128, unitVec1->vec128 );
-    cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = spu_splats(t);
-    oneMinusT = spu_sub( spu_splats(1.0f), tttt );
-    angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
-    angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
-    angles = spu_mul( angles, angle );
-    sines = sinf4( angles );
-    scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
-    scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
-    scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
-    result->vec128 = spu_madd( unitVec0->vec128, scale0, spu_mul( unitVec1->vec128, scale1 ) );
-}
-
-static inline vec_float4 vmathV3Get128( const VmathVector3 *vec )
-{
-    return vec->vec128;
-}
-
-static inline void vmathV3StoreXYZ( const VmathVector3 *vec, vec_float4 *quad )
-{
-    vec_float4 dstVec = *quad;
-    vec_uint4 mask = (vec_uint4)spu_maskb(0x000f);
-    dstVec = spu_sel(vec->vec128, dstVec, mask);
-    *quad = dstVec;
-}
-
-static inline void vmathV3LoadXYZArray( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyz1 = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_WABC );
-    xyz2 = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_ZWAB );
-    xyz3 = spu_rlqwbyte( zxyz, 4 );
-    vec0->vec128 = xyzx;
-    vec1->vec128 = xyz1;
-    vec2->vec128 = xyz2;
-    vec3->vec128 = xyz3;
-}
-
-static inline void vmathV3StoreXYZArray( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, vec_float4 *threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz;
-    xyzx = spu_shuffle( vec0->vec128, vec1->vec128, _VECTORMATH_SHUF_XYZA );
-    yzxy = spu_shuffle( vec1->vec128, vec2->vec128, _VECTORMATH_SHUF_YZAB );
-    zxyz = spu_shuffle( vec2->vec128, vec3->vec128, _VECTORMATH_SHUF_ZABC );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-static inline void vmathV3StoreHalfFloats( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, const VmathVector3 *vec4, const VmathVector3 *vec5, const VmathVector3 *vec6, const VmathVector3 *vec7, vec_ushort8 *threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    vmathV3StoreXYZArray( vec0, vec1, vec2, vec3, xyz0 );
-    vmathV3StoreXYZArray( vec4, vec5, vec6, vec7, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-static inline void vmathV3SetX( VmathVector3 *result, float _x )
-{
-    result->vec128 = spu_insert( _x, result->vec128, 0 );
-}
-
-static inline float vmathV3GetX( const VmathVector3 *vec )
-{
-    return spu_extract( vec->vec128, 0 );
-}
-
-static inline void vmathV3SetY( VmathVector3 *result, float _y )
-{
-    result->vec128 = spu_insert( _y, result->vec128, 1 );
-}
-
-static inline float vmathV3GetY( const VmathVector3 *vec )
-{
-    return spu_extract( vec->vec128, 1 );
-}
-
-static inline void vmathV3SetZ( VmathVector3 *result, float _z )
-{
-    result->vec128 = spu_insert( _z, result->vec128, 2 );
-}
-
-static inline float vmathV3GetZ( const VmathVector3 *vec )
-{
-    return spu_extract( vec->vec128, 2 );
-}
-
-static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value )
-{
-    result->vec128 = spu_insert( value, result->vec128, idx );
-}
-
-static inline float vmathV3GetElem( const VmathVector3 *vec, int idx )
-{
-    return spu_extract( vec->vec128, idx );
-}
-
-static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = spu_add( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = spu_sub( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = spu_add( vec->vec128, pnt1->vec128 );
-}
-
-static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar )
-{
-    result->vec128 = spu_mul( vec->vec128, spu_splats(scalar) );
-}
-
-static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar )
-{
-    result->vec128 = divf4( vec->vec128, spu_splats(scalar) );
-}
-
-static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = negatef4( vec->vec128 );
-}
-
-static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = spu_mul( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = divf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = recipf4( vec->vec128 );
-}
-
-static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = sqrtf4( vec->vec128 );
-}
-
-static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = rsqrtf4( vec->vec128 );
-}
-
-static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = fabsf4( vec->vec128 );
-}
-
-static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = copysignf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = fmaxf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline float vmathV3MaxElem( const VmathVector3 *vec )
-{
-    vec_float4 result;
-    result = fmaxf4( spu_promote( spu_extract( vec->vec128, 1 ), 0 ), vec->vec128 );
-    result = fmaxf4( spu_promote( spu_extract( vec->vec128, 2 ), 0 ), result );
-    return spu_extract( result, 0 );
-}
-
-static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = fminf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline float vmathV3MinElem( const VmathVector3 *vec )
-{
-    vec_float4 result;
-    result = fminf4( spu_promote( spu_extract( vec->vec128, 1 ), 0 ), vec->vec128 );
-    result = fminf4( spu_promote( spu_extract( vec->vec128, 2 ), 0 ), result );
-    return spu_extract( result, 0 );
-}
-
-static inline float vmathV3Sum( const VmathVector3 *vec )
-{
-    return
-        spu_extract( vec->vec128, 0 ) +
-        spu_extract( vec->vec128, 1 ) +
-        spu_extract( vec->vec128, 2 );
-}
-
-static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    return spu_extract( _vmathVfDot3( vec0->vec128, vec1->vec128 ), 0 );
-}
-
-static inline float vmathV3LengthSqr( const VmathVector3 *vec )
-{
-    return spu_extract( _vmathVfDot3( vec->vec128, vec->vec128 ), 0 );
-}
-
-static inline float vmathV3Length( const VmathVector3 *vec )
-{
-    return sqrtf( vmathV3LengthSqr( vec ) );
-}
-
-static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec )
-{
-    vec_float4 dot = _vmathVfDot3( vec->vec128, vec->vec128 );
-    dot = spu_shuffle( dot, dot, (vec_uchar16)spu_splats(0x00010203) );
-    result->vec128 = spu_mul( vec->vec128, rsqrtf4( dot ) );
-}
-
-static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = _vmathVfCross( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 )
-{
-    result->vec128 = spu_sel( vec0->vec128, vec1->vec128, spu_splats( (unsigned int)-(select1 > 0) ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathV3Print( const VmathVector3 *vec )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec->vec128;
-    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-static inline void vmathV3Prints( const VmathVector3 *vec, const char *name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec->vec128;
-    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-#endif
-
-static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->vec128 = vec->vec128;
-}
-
-static inline void vmathV4MakeFromElems( VmathVector4 *result, float _x, float _y, float _z, float _w )
-{
-    result->vec128 = (vec_float4){ _x, _y, _z, _w };
-}
-
-static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float _w )
-{
-    result->vec128 = spu_shuffle( xyz->vec128, spu_promote( _w, 0 ), _VECTORMATH_SHUF_XYZA );
-}
-
-static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec )
-{
-    result->vec128 = spu_sel( vec->vec128, spu_splats(0.0f), (vec_uint4)spu_maskb(0x000f) );
-}
-
-static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = spu_sel( pnt->vec128, spu_splats(1.0f), (vec_uint4)spu_maskb(0x000f) );
-}
-
-static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat )
-{
-    result->vec128 = quat->vec128;
-}
-
-static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar )
-{
-    result->vec128 = spu_splats( scalar );
-}
-
-static inline void vmathV4MakeFrom128( VmathVector4 *result, vec_float4 vf4 )
-{
-    result->vec128 = vf4;
-}
-
-static inline void vmathV4MakeXAxis( VmathVector4 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_1000;
-}
-
-static inline void vmathV4MakeYAxis( VmathVector4 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_0100;
-}
-
-static inline void vmathV4MakeZAxis( VmathVector4 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_0010;
-}
-
-static inline void vmathV4MakeWAxis( VmathVector4 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_0001;
-}
-
-static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    VmathVector4 tmpV4_0, tmpV4_1;
-    vmathV4Sub( &tmpV4_0, vec1, vec0 );
-    vmathV4ScalarMul( &tmpV4_1, &tmpV4_0, t );
-    vmathV4Add( result, vec0, &tmpV4_1 );
-}
-
-static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 )
-{
-    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    vec_uint4 selectMask;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    cosAngle = _vmathVfDot4( unitVec0->vec128, unitVec1->vec128 );
-    cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = spu_splats(t);
-    oneMinusT = spu_sub( spu_splats(1.0f), tttt );
-    angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
-    angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
-    angles = spu_mul( angles, angle );
-    sines = sinf4( angles );
-    scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
-    scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
-    scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
-    result->vec128 = spu_madd( unitVec0->vec128, scale0, spu_mul( unitVec1->vec128, scale1 ) );
-}
-
-static inline vec_float4 vmathV4Get128( const VmathVector4 *vec )
-{
-    return vec->vec128;
-}
-
-static inline void vmathV4StoreHalfFloats( const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3, vec_ushort8 *twoQuads )
-{
-    twoQuads[0] = _vmath2VfToHalfFloats(vec0->vec128, vec1->vec128);
-    twoQuads[1] = _vmath2VfToHalfFloats(vec2->vec128, vec3->vec128);
-}
-
-static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec )
-{
-    result->vec128 = spu_sel( vec->vec128, result->vec128, (vec_uint4)spu_maskb(0x000f) );
-}
-
-static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec )
-{
-    result->vec128 = vec->vec128;
-}
-
-static inline void vmathV4SetX( VmathVector4 *result, float _x )
-{
-    result->vec128 = spu_insert( _x, result->vec128, 0 );
-}
-
-static inline float vmathV4GetX( const VmathVector4 *vec )
-{
-    return spu_extract( vec->vec128, 0 );
-}
-
-static inline void vmathV4SetY( VmathVector4 *result, float _y )
-{
-    result->vec128 = spu_insert( _y, result->vec128, 1 );
-}
-
-static inline float vmathV4GetY( const VmathVector4 *vec )
-{
-    return spu_extract( vec->vec128, 1 );
-}
-
-static inline void vmathV4SetZ( VmathVector4 *result, float _z )
-{
-    result->vec128 = spu_insert( _z, result->vec128, 2 );
-}
-
-static inline float vmathV4GetZ( const VmathVector4 *vec )
-{
-    return spu_extract( vec->vec128, 2 );
-}
-
-static inline void vmathV4SetW( VmathVector4 *result, float _w )
-{
-    result->vec128 = spu_insert( _w, result->vec128, 3 );
-}
-
-static inline float vmathV4GetW( const VmathVector4 *vec )
-{
-    return spu_extract( vec->vec128, 3 );
-}
-
-static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value )
-{
-    result->vec128 = spu_insert( value, result->vec128, idx );
-}
-
-static inline float vmathV4GetElem( const VmathVector4 *vec, int idx )
-{
-    return spu_extract( vec->vec128, idx );
-}
-
-static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = spu_add( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = spu_sub( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar )
-{
-    result->vec128 = spu_mul( vec->vec128, spu_splats(scalar) );
-}
-
-static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar )
-{
-    result->vec128 = divf4( vec->vec128, spu_splats(scalar) );
-}
-
-static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->vec128 = negatef4( vec->vec128 );
-}
-
-static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = spu_mul( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = divf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->vec128 = recipf4( vec->vec128 );
-}
-
-static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->vec128 = sqrtf4( vec->vec128 );
-}
-
-static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->vec128 = rsqrtf4( vec->vec128 );
-}
-
-static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->vec128 = fabsf4( vec->vec128 );
-}
-
-static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = copysignf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = fmaxf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline float vmathV4MaxElem( const VmathVector4 *vec )
-{
-    vec_float4 result;
-    result = fmaxf4( spu_promote( spu_extract( vec->vec128, 1 ), 0 ), vec->vec128 );
-    result = fmaxf4( spu_promote( spu_extract( vec->vec128, 2 ), 0 ), result );
-    result = fmaxf4( spu_promote( spu_extract( vec->vec128, 3 ), 0 ), result );
-    return spu_extract( result, 0 );
-}
-
-static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = fminf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline float vmathV4MinElem( const VmathVector4 *vec )
-{
-    vec_float4 result;
-    result = fminf4( spu_promote( spu_extract( vec->vec128, 1 ), 0 ), vec->vec128 );
-    result = fminf4( spu_promote( spu_extract( vec->vec128, 2 ), 0 ), result );
-    result = fminf4( spu_promote( spu_extract( vec->vec128, 3 ), 0 ), result );
-    return spu_extract( result, 0 );
-}
-
-static inline float vmathV4Sum( const VmathVector4 *vec )
-{
-    return
-        spu_extract( vec->vec128, 0 ) +
-        spu_extract( vec->vec128, 1 ) +
-        spu_extract( vec->vec128, 2 ) +
-        spu_extract( vec->vec128, 3 );
-}
-
-static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    return spu_extract( _vmathVfDot4( vec0->vec128, vec1->vec128 ), 0 );
-}
-
-static inline float vmathV4LengthSqr( const VmathVector4 *vec )
-{
-    return spu_extract( _vmathVfDot4( vec->vec128, vec->vec128 ), 0 );
-}
-
-static inline float vmathV4Length( const VmathVector4 *vec )
-{
-    return sqrtf( vmathV4LengthSqr( vec ) );
-}
-
-static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec )
-{
-    vec_float4 dot = _vmathVfDot4( vec->vec128, vec->vec128 );
-    result->vec128 = spu_mul( vec->vec128, rsqrtf4( dot ) );
-}
-
-static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 )
-{
-    result->vec128 = spu_sel( vec0->vec128, vec1->vec128, spu_splats( (unsigned int)-(select1 > 0) ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathV4Print( const VmathVector4 *vec )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec->vec128;
-    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-static inline void vmathV4Prints( const VmathVector4 *vec, const char *name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec->vec128;
-    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-#endif
-
-static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = pnt->vec128;
-}
-
-static inline void vmathP3MakeFromElems( VmathPoint3 *result, float _x, float _y, float _z )
-{
-    result->vec128 = (vec_float4){ _x, _y, _z, 0.0f  };
-}
-
-static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = vec->vec128;
-}
-
-static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar )
-{
-    result->vec128 = spu_splats( scalar );
-}
-
-static inline void vmathP3MakeFrom128( VmathPoint3 *result, vec_float4 vf4 )
-{
-    result->vec128 = vf4;
-}
-
-static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    VmathVector3 tmpV3_0, tmpV3_1;
-    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
-    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
-    vmathP3AddV3( result, pnt0, &tmpV3_1 );
-}
-
-static inline vec_float4 vmathP3Get128( const VmathPoint3 *pnt )
-{
-    return pnt->vec128;
-}
-
-static inline void vmathP3StoreXYZ( const VmathPoint3 *pnt, vec_float4 *quad )
-{
-    vec_float4 dstVec = *quad;
-    vec_uint4 mask = (vec_uint4)spu_maskb(0x000f);
-    dstVec = spu_sel(pnt->vec128, dstVec, mask);
-    *quad = dstVec;
-}
-
-static inline void vmathP3LoadXYZArray( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyz1 = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_WABC );
-    xyz2 = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_ZWAB );
-    xyz3 = spu_rlqwbyte( zxyz, 4 );
-    pnt0->vec128 = xyzx;
-    pnt1->vec128 = xyz1;
-    pnt2->vec128 = xyz2;
-    pnt3->vec128 = xyz3;
-}
-
-static inline void vmathP3StoreXYZArray( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, vec_float4 *threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz;
-    xyzx = spu_shuffle( pnt0->vec128, pnt1->vec128, _VECTORMATH_SHUF_XYZA );
-    yzxy = spu_shuffle( pnt1->vec128, pnt2->vec128, _VECTORMATH_SHUF_YZAB );
-    zxyz = spu_shuffle( pnt2->vec128, pnt3->vec128, _VECTORMATH_SHUF_ZABC );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-static inline void vmathP3StoreHalfFloats( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, const VmathPoint3 *pnt4, const VmathPoint3 *pnt5, const VmathPoint3 *pnt6, const VmathPoint3 *pnt7, vec_ushort8 *threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    vmathP3StoreXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
-    vmathP3StoreXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-static inline void vmathP3SetX( VmathPoint3 *result, float _x )
-{
-    result->vec128 = spu_insert( _x, result->vec128, 0 );
-}
-
-static inline float vmathP3GetX( const VmathPoint3 *pnt )
-{
-    return spu_extract( pnt->vec128, 0 );
-}
-
-static inline void vmathP3SetY( VmathPoint3 *result, float _y )
-{
-    result->vec128 = spu_insert( _y, result->vec128, 1 );
-}
-
-static inline float vmathP3GetY( const VmathPoint3 *pnt )
-{
-    return spu_extract( pnt->vec128, 1 );
-}
-
-static inline void vmathP3SetZ( VmathPoint3 *result, float _z )
-{
-    result->vec128 = spu_insert( _z, result->vec128, 2 );
-}
-
-static inline float vmathP3GetZ( const VmathPoint3 *pnt )
-{
-    return spu_extract( pnt->vec128, 2 );
-}
-
-static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value )
-{
-    result->vec128 = spu_insert( value, result->vec128, idx );
-}
-
-static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx )
-{
-    return spu_extract( pnt->vec128, idx );
-}
-
-static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = spu_sub( pnt0->vec128, pnt1->vec128 );
-}
-
-static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
-{
-    result->vec128 = spu_add( pnt->vec128, vec1->vec128 );
-}
-
-static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
-{
-    result->vec128 = spu_sub( pnt->vec128, vec1->vec128 );
-}
-
-static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = spu_mul( pnt0->vec128, pnt1->vec128 );
-}
-
-static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = divf4( pnt0->vec128, pnt1->vec128 );
-}
-
-static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = recipf4( pnt->vec128 );
-}
-
-static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = sqrtf4( pnt->vec128 );
-}
-
-static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = rsqrtf4( pnt->vec128 );
-}
-
-static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = fabsf4( pnt->vec128 );
-}
-
-static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = copysignf4( pnt0->vec128, pnt1->vec128 );
-}
-
-static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = fmaxf4( pnt0->vec128, pnt1->vec128 );
-}
-
-static inline float vmathP3MaxElem( const VmathPoint3 *pnt )
-{
-    vec_float4 result;
-    result = fmaxf4( spu_promote( spu_extract( pnt->vec128, 1 ), 0 ), pnt->vec128 );
-    result = fmaxf4( spu_promote( spu_extract( pnt->vec128, 2 ), 0 ), result );
-    return spu_extract( result, 0 );
-}
-
-static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = fminf4( pnt0->vec128, pnt1->vec128 );
-}
-
-static inline float vmathP3MinElem( const VmathPoint3 *pnt )
-{
-    vec_float4 result;
-    result = fminf4( spu_promote( spu_extract( pnt->vec128, 1 ), 0 ), pnt->vec128 );
-    result = fminf4( spu_promote( spu_extract( pnt->vec128, 2 ), 0 ), result );
-    return spu_extract( result, 0 );
-}
-
-static inline float vmathP3Sum( const VmathPoint3 *pnt )
-{
-    return
-        spu_extract( pnt->vec128, 0 ) +
-        spu_extract( pnt->vec128, 1 ) +
-        spu_extract( pnt->vec128, 2 );
-}
-
-static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal )
-{
-    VmathPoint3 tmpP3_0;
-    vmathP3MakeFromScalar( &tmpP3_0, scaleVal );
-    vmathP3MulPerElem( result, pnt, &tmpP3_0 );
-}
-
-static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec )
-{
-    VmathPoint3 tmpP3_0;
-    vmathP3MakeFromV3( &tmpP3_0, scaleVec );
-    vmathP3MulPerElem( result, pnt, &tmpP3_0 );
-}
-
-static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec )
-{
-    return spu_extract( _vmathVfDot3( pnt->vec128, unitVec->vec128 ), 0 );
-}
-
-static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt )
-{
-    VmathVector3 tmpV3_0;
-    vmathV3MakeFromP3( &tmpV3_0, pnt );
-    return vmathV3LengthSqr( &tmpV3_0 );
-}
-
-static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt )
-{
-    VmathVector3 tmpV3_0;
-    vmathV3MakeFromP3( &tmpV3_0, pnt );
-    return vmathV3Length( &tmpV3_0 );
-}
-
-static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    VmathVector3 tmpV3_0;
-    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
-    return vmathV3LengthSqr( &tmpV3_0 );
-}
-
-static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    VmathVector3 tmpV3_0;
-    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
-    return vmathV3Length( &tmpV3_0 );
-}
-
-static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 )
-{
-    result->vec128 = spu_sel( pnt0->vec128, pnt1->vec128, spu_splats( (unsigned int)-(select1 > 0) ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathP3Print( const VmathPoint3 *pnt )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = pnt->vec128;
-    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = pnt->vec128;
-    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_AOS_C_H
+#define _VECTORMATH_VEC_AOS_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+ */
+#define _VECTORMATH_SHUF_X 0x00010203
+#define _VECTORMATH_SHUF_Y 0x04050607
+#define _VECTORMATH_SHUF_Z 0x08090a0b
+#define _VECTORMATH_SHUF_W 0x0c0d0e0f
+#define _VECTORMATH_SHUF_A 0x10111213
+#define _VECTORMATH_SHUF_B 0x14151617
+#define _VECTORMATH_SHUF_C 0x18191a1b
+#define _VECTORMATH_SHUF_D 0x1c1d1e1f
+#define _VECTORMATH_SHUF_0 0x80808080
+#define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
+#define _VECTORMATH_SHUF_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_W }
+#define _VECTORMATH_SHUF_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W }
+#define _VECTORMATH_SHUF_WABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
+#define _VECTORMATH_SHUF_ZWAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
+#define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
+#define _VECTORMATH_SHUF_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
+#define _VECTORMATH_SHUF_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
+#define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
+#define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+static inline vec_float4 _vmathVfDot3( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 result;
+    result = spu_mul( vec0, vec1 );
+    result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
+    return spu_madd( spu_rlqwbyte( vec0, 8 ), spu_rlqwbyte( vec1, 8 ), result );
+}
+
+static inline vec_float4 _vmathVfDot4( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 result;
+    result = spu_mul( vec0, vec1 );
+    result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
+    return spu_add( spu_rlqwbyte( result, 8 ), result );
+}
+
+static inline vec_float4 _vmathVfCross( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3, result;
+    tmp0 = spu_shuffle( vec0, vec0, _VECTORMATH_SHUF_YZXW );
+    tmp1 = spu_shuffle( vec1, vec1, _VECTORMATH_SHUF_ZXYW );
+    tmp2 = spu_shuffle( vec0, vec0, _VECTORMATH_SHUF_ZXYW );
+    tmp3 = spu_shuffle( vec1, vec1, _VECTORMATH_SHUF_YZXW );
+    result = spu_mul( tmp0, tmp1 );
+    result = spu_nmsub( tmp2, tmp3, result );
+    return result;
+}
+
+static inline vec_uint4 _vmathVfToHalfFloatsUnpacked(vec_float4 v)
+{
+    vec_int4 bexp;
+    vec_uint4 mant, sign, hfloat;
+    vec_uint4 notZero, isInf;
+    const vec_uint4 hfloatInf = spu_splats(0x00007c00u);
+    const vec_uint4 mergeMant = spu_splats(0x000003ffu);
+    const vec_uint4 mergeSign = spu_splats(0x00008000u);
+
+    sign = spu_rlmask((vec_uint4)v, -16);
+    mant = spu_rlmask((vec_uint4)v, -13);
+    bexp = spu_and(spu_rlmask((vec_int4)v, -23), 0xff);
+
+    notZero = spu_cmpgt(bexp, 112);
+    isInf = spu_cmpgt(bexp, 142);
+
+    bexp = spu_add(bexp, -112);
+    bexp = spu_sl(bexp, 10);
+
+    hfloat = spu_sel((vec_uint4)bexp, mant, mergeMant);
+    hfloat = spu_sel(spu_splats(0u), hfloat, notZero);
+    hfloat = spu_sel(hfloat, hfloatInf, isInf);
+    hfloat = spu_sel(hfloat, sign, mergeSign);
+
+    return hfloat;
+}
+
+static inline vec_ushort8 _vmath2VfToHalfFloats(vec_float4 u, vec_float4 v)
+{
+    vec_uint4 hfloat_u, hfloat_v;
+    const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
+    hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
+    hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
+    return (vec_ushort8)spu_shuffle(hfloat_u, hfloat_v, pack);
+}
+
+#endif
+
+static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = vec->vec128;
+}
+
+static inline void vmathV3MakeFromElems( VmathVector3 *result, float _x, float _y, float _z )
+{
+    result->vec128 = (vec_float4){ _x, _y, _z, 0.0f  };
+}
+
+static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = pnt->vec128;
+}
+
+static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar )
+{
+    result->vec128 = spu_splats( scalar );
+}
+
+static inline void vmathV3MakeFrom128( VmathVector3 *result, vec_float4 vf4 )
+{
+    result->vec128 = vf4;
+}
+
+static inline void vmathV3MakeXAxis( VmathVector3 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_1000;
+}
+
+static inline void vmathV3MakeYAxis( VmathVector3 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_0100;
+}
+
+static inline void vmathV3MakeZAxis( VmathVector3 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_0010;
+}
+
+static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    VmathVector3 tmpV3_0, tmpV3_1;
+    vmathV3Sub( &tmpV3_0, vec1, vec0 );
+    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
+    vmathV3Add( result, vec0, &tmpV3_1 );
+}
+
+static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
+{
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    vec_uint4 selectMask;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    cosAngle = _vmathVfDot3( unitVec0->vec128, unitVec1->vec128 );
+    cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = spu_splats(t);
+    oneMinusT = spu_sub( spu_splats(1.0f), tttt );
+    angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
+    angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
+    angles = spu_mul( angles, angle );
+    sines = sinf4( angles );
+    scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
+    scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
+    scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
+    result->vec128 = spu_madd( unitVec0->vec128, scale0, spu_mul( unitVec1->vec128, scale1 ) );
+}
+
+static inline vec_float4 vmathV3Get128( const VmathVector3 *vec )
+{
+    return vec->vec128;
+}
+
+static inline void vmathV3StoreXYZ( const VmathVector3 *vec, vec_float4 *quad )
+{
+    vec_float4 dstVec = *quad;
+    vec_uint4 mask = (vec_uint4)spu_maskb(0x000f);
+    dstVec = spu_sel(vec->vec128, dstVec, mask);
+    *quad = dstVec;
+}
+
+static inline void vmathV3LoadXYZArray( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyz1 = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_WABC );
+    xyz2 = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_ZWAB );
+    xyz3 = spu_rlqwbyte( zxyz, 4 );
+    vec0->vec128 = xyzx;
+    vec1->vec128 = xyz1;
+    vec2->vec128 = xyz2;
+    vec3->vec128 = xyz3;
+}
+
+static inline void vmathV3StoreXYZArray( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, vec_float4 *threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz;
+    xyzx = spu_shuffle( vec0->vec128, vec1->vec128, _VECTORMATH_SHUF_XYZA );
+    yzxy = spu_shuffle( vec1->vec128, vec2->vec128, _VECTORMATH_SHUF_YZAB );
+    zxyz = spu_shuffle( vec2->vec128, vec3->vec128, _VECTORMATH_SHUF_ZABC );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+static inline void vmathV3StoreHalfFloats( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, const VmathVector3 *vec4, const VmathVector3 *vec5, const VmathVector3 *vec6, const VmathVector3 *vec7, vec_ushort8 *threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    vmathV3StoreXYZArray( vec0, vec1, vec2, vec3, xyz0 );
+    vmathV3StoreXYZArray( vec4, vec5, vec6, vec7, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+static inline void vmathV3SetX( VmathVector3 *result, float _x )
+{
+    result->vec128 = spu_insert( _x, result->vec128, 0 );
+}
+
+static inline float vmathV3GetX( const VmathVector3 *vec )
+{
+    return spu_extract( vec->vec128, 0 );
+}
+
+static inline void vmathV3SetY( VmathVector3 *result, float _y )
+{
+    result->vec128 = spu_insert( _y, result->vec128, 1 );
+}
+
+static inline float vmathV3GetY( const VmathVector3 *vec )
+{
+    return spu_extract( vec->vec128, 1 );
+}
+
+static inline void vmathV3SetZ( VmathVector3 *result, float _z )
+{
+    result->vec128 = spu_insert( _z, result->vec128, 2 );
+}
+
+static inline float vmathV3GetZ( const VmathVector3 *vec )
+{
+    return spu_extract( vec->vec128, 2 );
+}
+
+static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value )
+{
+    result->vec128 = spu_insert( value, result->vec128, idx );
+}
+
+static inline float vmathV3GetElem( const VmathVector3 *vec, int idx )
+{
+    return spu_extract( vec->vec128, idx );
+}
+
+static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = spu_add( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = spu_sub( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = spu_add( vec->vec128, pnt1->vec128 );
+}
+
+static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar )
+{
+    result->vec128 = spu_mul( vec->vec128, spu_splats(scalar) );
+}
+
+static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar )
+{
+    result->vec128 = divf4( vec->vec128, spu_splats(scalar) );
+}
+
+static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = negatef4( vec->vec128 );
+}
+
+static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = spu_mul( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = divf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = recipf4( vec->vec128 );
+}
+
+static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = sqrtf4( vec->vec128 );
+}
+
+static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = rsqrtf4( vec->vec128 );
+}
+
+static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = fabsf4( vec->vec128 );
+}
+
+static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = copysignf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = fmaxf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline float vmathV3MaxElem( const VmathVector3 *vec )
+{
+    vec_float4 result;
+    result = fmaxf4( spu_promote( spu_extract( vec->vec128, 1 ), 0 ), vec->vec128 );
+    result = fmaxf4( spu_promote( spu_extract( vec->vec128, 2 ), 0 ), result );
+    return spu_extract( result, 0 );
+}
+
+static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = fminf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline float vmathV3MinElem( const VmathVector3 *vec )
+{
+    vec_float4 result;
+    result = fminf4( spu_promote( spu_extract( vec->vec128, 1 ), 0 ), vec->vec128 );
+    result = fminf4( spu_promote( spu_extract( vec->vec128, 2 ), 0 ), result );
+    return spu_extract( result, 0 );
+}
+
+static inline float vmathV3Sum( const VmathVector3 *vec )
+{
+    return
+        spu_extract( vec->vec128, 0 ) +
+        spu_extract( vec->vec128, 1 ) +
+        spu_extract( vec->vec128, 2 );
+}
+
+static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    return spu_extract( _vmathVfDot3( vec0->vec128, vec1->vec128 ), 0 );
+}
+
+static inline float vmathV3LengthSqr( const VmathVector3 *vec )
+{
+    return spu_extract( _vmathVfDot3( vec->vec128, vec->vec128 ), 0 );
+}
+
+static inline float vmathV3Length( const VmathVector3 *vec )
+{
+    return sqrtf( vmathV3LengthSqr( vec ) );
+}
+
+static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec )
+{
+    vec_float4 dot = _vmathVfDot3( vec->vec128, vec->vec128 );
+    dot = spu_shuffle( dot, dot, (vec_uchar16)spu_splats(0x00010203) );
+    result->vec128 = spu_mul( vec->vec128, rsqrtf4( dot ) );
+}
+
+static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = _vmathVfCross( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 )
+{
+    result->vec128 = spu_sel( vec0->vec128, vec1->vec128, spu_splats( (unsigned int)-(select1 > 0) ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathV3Print( const VmathVector3 *vec )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec->vec128;
+    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+static inline void vmathV3Prints( const VmathVector3 *vec, const char *name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec->vec128;
+    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+#endif
+
+static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->vec128 = vec->vec128;
+}
+
+static inline void vmathV4MakeFromElems( VmathVector4 *result, float _x, float _y, float _z, float _w )
+{
+    result->vec128 = (vec_float4){ _x, _y, _z, _w };
+}
+
+static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float _w )
+{
+    result->vec128 = spu_shuffle( xyz->vec128, spu_promote( _w, 0 ), _VECTORMATH_SHUF_XYZA );
+}
+
+static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec )
+{
+    result->vec128 = spu_sel( vec->vec128, spu_splats(0.0f), (vec_uint4)spu_maskb(0x000f) );
+}
+
+static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = spu_sel( pnt->vec128, spu_splats(1.0f), (vec_uint4)spu_maskb(0x000f) );
+}
+
+static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat )
+{
+    result->vec128 = quat->vec128;
+}
+
+static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar )
+{
+    result->vec128 = spu_splats( scalar );
+}
+
+static inline void vmathV4MakeFrom128( VmathVector4 *result, vec_float4 vf4 )
+{
+    result->vec128 = vf4;
+}
+
+static inline void vmathV4MakeXAxis( VmathVector4 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_1000;
+}
+
+static inline void vmathV4MakeYAxis( VmathVector4 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_0100;
+}
+
+static inline void vmathV4MakeZAxis( VmathVector4 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_0010;
+}
+
+static inline void vmathV4MakeWAxis( VmathVector4 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_0001;
+}
+
+static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    VmathVector4 tmpV4_0, tmpV4_1;
+    vmathV4Sub( &tmpV4_0, vec1, vec0 );
+    vmathV4ScalarMul( &tmpV4_1, &tmpV4_0, t );
+    vmathV4Add( result, vec0, &tmpV4_1 );
+}
+
+static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 )
+{
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    vec_uint4 selectMask;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    cosAngle = _vmathVfDot4( unitVec0->vec128, unitVec1->vec128 );
+    cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = spu_splats(t);
+    oneMinusT = spu_sub( spu_splats(1.0f), tttt );
+    angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
+    angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
+    angles = spu_mul( angles, angle );
+    sines = sinf4( angles );
+    scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
+    scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
+    scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
+    result->vec128 = spu_madd( unitVec0->vec128, scale0, spu_mul( unitVec1->vec128, scale1 ) );
+}
+
+static inline vec_float4 vmathV4Get128( const VmathVector4 *vec )
+{
+    return vec->vec128;
+}
+
+static inline void vmathV4StoreHalfFloats( const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3, vec_ushort8 *twoQuads )
+{
+    twoQuads[0] = _vmath2VfToHalfFloats(vec0->vec128, vec1->vec128);
+    twoQuads[1] = _vmath2VfToHalfFloats(vec2->vec128, vec3->vec128);
+}
+
+static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec )
+{
+    result->vec128 = spu_sel( vec->vec128, result->vec128, (vec_uint4)spu_maskb(0x000f) );
+}
+
+static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec )
+{
+    result->vec128 = vec->vec128;
+}
+
+static inline void vmathV4SetX( VmathVector4 *result, float _x )
+{
+    result->vec128 = spu_insert( _x, result->vec128, 0 );
+}
+
+static inline float vmathV4GetX( const VmathVector4 *vec )
+{
+    return spu_extract( vec->vec128, 0 );
+}
+
+static inline void vmathV4SetY( VmathVector4 *result, float _y )
+{
+    result->vec128 = spu_insert( _y, result->vec128, 1 );
+}
+
+static inline float vmathV4GetY( const VmathVector4 *vec )
+{
+    return spu_extract( vec->vec128, 1 );
+}
+
+static inline void vmathV4SetZ( VmathVector4 *result, float _z )
+{
+    result->vec128 = spu_insert( _z, result->vec128, 2 );
+}
+
+static inline float vmathV4GetZ( const VmathVector4 *vec )
+{
+    return spu_extract( vec->vec128, 2 );
+}
+
+static inline void vmathV4SetW( VmathVector4 *result, float _w )
+{
+    result->vec128 = spu_insert( _w, result->vec128, 3 );
+}
+
+static inline float vmathV4GetW( const VmathVector4 *vec )
+{
+    return spu_extract( vec->vec128, 3 );
+}
+
+static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value )
+{
+    result->vec128 = spu_insert( value, result->vec128, idx );
+}
+
+static inline float vmathV4GetElem( const VmathVector4 *vec, int idx )
+{
+    return spu_extract( vec->vec128, idx );
+}
+
+static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = spu_add( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = spu_sub( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar )
+{
+    result->vec128 = spu_mul( vec->vec128, spu_splats(scalar) );
+}
+
+static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar )
+{
+    result->vec128 = divf4( vec->vec128, spu_splats(scalar) );
+}
+
+static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->vec128 = negatef4( vec->vec128 );
+}
+
+static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = spu_mul( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = divf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->vec128 = recipf4( vec->vec128 );
+}
+
+static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->vec128 = sqrtf4( vec->vec128 );
+}
+
+static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->vec128 = rsqrtf4( vec->vec128 );
+}
+
+static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->vec128 = fabsf4( vec->vec128 );
+}
+
+static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = copysignf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = fmaxf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline float vmathV4MaxElem( const VmathVector4 *vec )
+{
+    vec_float4 result;
+    result = fmaxf4( spu_promote( spu_extract( vec->vec128, 1 ), 0 ), vec->vec128 );
+    result = fmaxf4( spu_promote( spu_extract( vec->vec128, 2 ), 0 ), result );
+    result = fmaxf4( spu_promote( spu_extract( vec->vec128, 3 ), 0 ), result );
+    return spu_extract( result, 0 );
+}
+
+static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = fminf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline float vmathV4MinElem( const VmathVector4 *vec )
+{
+    vec_float4 result;
+    result = fminf4( spu_promote( spu_extract( vec->vec128, 1 ), 0 ), vec->vec128 );
+    result = fminf4( spu_promote( spu_extract( vec->vec128, 2 ), 0 ), result );
+    result = fminf4( spu_promote( spu_extract( vec->vec128, 3 ), 0 ), result );
+    return spu_extract( result, 0 );
+}
+
+static inline float vmathV4Sum( const VmathVector4 *vec )
+{
+    return
+        spu_extract( vec->vec128, 0 ) +
+        spu_extract( vec->vec128, 1 ) +
+        spu_extract( vec->vec128, 2 ) +
+        spu_extract( vec->vec128, 3 );
+}
+
+static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    return spu_extract( _vmathVfDot4( vec0->vec128, vec1->vec128 ), 0 );
+}
+
+static inline float vmathV4LengthSqr( const VmathVector4 *vec )
+{
+    return spu_extract( _vmathVfDot4( vec->vec128, vec->vec128 ), 0 );
+}
+
+static inline float vmathV4Length( const VmathVector4 *vec )
+{
+    return sqrtf( vmathV4LengthSqr( vec ) );
+}
+
+static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec )
+{
+    vec_float4 dot = _vmathVfDot4( vec->vec128, vec->vec128 );
+    result->vec128 = spu_mul( vec->vec128, rsqrtf4( dot ) );
+}
+
+static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 )
+{
+    result->vec128 = spu_sel( vec0->vec128, vec1->vec128, spu_splats( (unsigned int)-(select1 > 0) ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathV4Print( const VmathVector4 *vec )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec->vec128;
+    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+static inline void vmathV4Prints( const VmathVector4 *vec, const char *name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec->vec128;
+    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+#endif
+
+static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = pnt->vec128;
+}
+
+static inline void vmathP3MakeFromElems( VmathPoint3 *result, float _x, float _y, float _z )
+{
+    result->vec128 = (vec_float4){ _x, _y, _z, 0.0f  };
+}
+
+static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = vec->vec128;
+}
+
+static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar )
+{
+    result->vec128 = spu_splats( scalar );
+}
+
+static inline void vmathP3MakeFrom128( VmathPoint3 *result, vec_float4 vf4 )
+{
+    result->vec128 = vf4;
+}
+
+static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    VmathVector3 tmpV3_0, tmpV3_1;
+    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
+    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
+    vmathP3AddV3( result, pnt0, &tmpV3_1 );
+}
+
+static inline vec_float4 vmathP3Get128( const VmathPoint3 *pnt )
+{
+    return pnt->vec128;
+}
+
+static inline void vmathP3StoreXYZ( const VmathPoint3 *pnt, vec_float4 *quad )
+{
+    vec_float4 dstVec = *quad;
+    vec_uint4 mask = (vec_uint4)spu_maskb(0x000f);
+    dstVec = spu_sel(pnt->vec128, dstVec, mask);
+    *quad = dstVec;
+}
+
+static inline void vmathP3LoadXYZArray( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyz1 = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_WABC );
+    xyz2 = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_ZWAB );
+    xyz3 = spu_rlqwbyte( zxyz, 4 );
+    pnt0->vec128 = xyzx;
+    pnt1->vec128 = xyz1;
+    pnt2->vec128 = xyz2;
+    pnt3->vec128 = xyz3;
+}
+
+static inline void vmathP3StoreXYZArray( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, vec_float4 *threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz;
+    xyzx = spu_shuffle( pnt0->vec128, pnt1->vec128, _VECTORMATH_SHUF_XYZA );
+    yzxy = spu_shuffle( pnt1->vec128, pnt2->vec128, _VECTORMATH_SHUF_YZAB );
+    zxyz = spu_shuffle( pnt2->vec128, pnt3->vec128, _VECTORMATH_SHUF_ZABC );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+static inline void vmathP3StoreHalfFloats( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, const VmathPoint3 *pnt4, const VmathPoint3 *pnt5, const VmathPoint3 *pnt6, const VmathPoint3 *pnt7, vec_ushort8 *threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    vmathP3StoreXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
+    vmathP3StoreXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+static inline void vmathP3SetX( VmathPoint3 *result, float _x )
+{
+    result->vec128 = spu_insert( _x, result->vec128, 0 );
+}
+
+static inline float vmathP3GetX( const VmathPoint3 *pnt )
+{
+    return spu_extract( pnt->vec128, 0 );
+}
+
+static inline void vmathP3SetY( VmathPoint3 *result, float _y )
+{
+    result->vec128 = spu_insert( _y, result->vec128, 1 );
+}
+
+static inline float vmathP3GetY( const VmathPoint3 *pnt )
+{
+    return spu_extract( pnt->vec128, 1 );
+}
+
+static inline void vmathP3SetZ( VmathPoint3 *result, float _z )
+{
+    result->vec128 = spu_insert( _z, result->vec128, 2 );
+}
+
+static inline float vmathP3GetZ( const VmathPoint3 *pnt )
+{
+    return spu_extract( pnt->vec128, 2 );
+}
+
+static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value )
+{
+    result->vec128 = spu_insert( value, result->vec128, idx );
+}
+
+static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx )
+{
+    return spu_extract( pnt->vec128, idx );
+}
+
+static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = spu_sub( pnt0->vec128, pnt1->vec128 );
+}
+
+static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
+{
+    result->vec128 = spu_add( pnt->vec128, vec1->vec128 );
+}
+
+static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
+{
+    result->vec128 = spu_sub( pnt->vec128, vec1->vec128 );
+}
+
+static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = spu_mul( pnt0->vec128, pnt1->vec128 );
+}
+
+static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = divf4( pnt0->vec128, pnt1->vec128 );
+}
+
+static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = recipf4( pnt->vec128 );
+}
+
+static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = sqrtf4( pnt->vec128 );
+}
+
+static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = rsqrtf4( pnt->vec128 );
+}
+
+static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = fabsf4( pnt->vec128 );
+}
+
+static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = copysignf4( pnt0->vec128, pnt1->vec128 );
+}
+
+static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = fmaxf4( pnt0->vec128, pnt1->vec128 );
+}
+
+static inline float vmathP3MaxElem( const VmathPoint3 *pnt )
+{
+    vec_float4 result;
+    result = fmaxf4( spu_promote( spu_extract( pnt->vec128, 1 ), 0 ), pnt->vec128 );
+    result = fmaxf4( spu_promote( spu_extract( pnt->vec128, 2 ), 0 ), result );
+    return spu_extract( result, 0 );
+}
+
+static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = fminf4( pnt0->vec128, pnt1->vec128 );
+}
+
+static inline float vmathP3MinElem( const VmathPoint3 *pnt )
+{
+    vec_float4 result;
+    result = fminf4( spu_promote( spu_extract( pnt->vec128, 1 ), 0 ), pnt->vec128 );
+    result = fminf4( spu_promote( spu_extract( pnt->vec128, 2 ), 0 ), result );
+    return spu_extract( result, 0 );
+}
+
+static inline float vmathP3Sum( const VmathPoint3 *pnt )
+{
+    return
+        spu_extract( pnt->vec128, 0 ) +
+        spu_extract( pnt->vec128, 1 ) +
+        spu_extract( pnt->vec128, 2 );
+}
+
+static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal )
+{
+    VmathPoint3 tmpP3_0;
+    vmathP3MakeFromScalar( &tmpP3_0, scaleVal );
+    vmathP3MulPerElem( result, pnt, &tmpP3_0 );
+}
+
+static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec )
+{
+    VmathPoint3 tmpP3_0;
+    vmathP3MakeFromV3( &tmpP3_0, scaleVec );
+    vmathP3MulPerElem( result, pnt, &tmpP3_0 );
+}
+
+static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec )
+{
+    return spu_extract( _vmathVfDot3( pnt->vec128, unitVec->vec128 ), 0 );
+}
+
+static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt )
+{
+    VmathVector3 tmpV3_0;
+    vmathV3MakeFromP3( &tmpV3_0, pnt );
+    return vmathV3LengthSqr( &tmpV3_0 );
+}
+
+static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt )
+{
+    VmathVector3 tmpV3_0;
+    vmathV3MakeFromP3( &tmpV3_0, pnt );
+    return vmathV3Length( &tmpV3_0 );
+}
+
+static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    VmathVector3 tmpV3_0;
+    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
+    return vmathV3LengthSqr( &tmpV3_0 );
+}
+
+static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    VmathVector3 tmpV3_0;
+    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
+    return vmathV3Length( &tmpV3_0 );
+}
+
+static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 )
+{
+    result->vec128 = spu_sel( pnt0->vec128, pnt1->vec128, spu_splats( (unsigned int)-(select1 > 0) ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathP3Print( const VmathPoint3 *pnt )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = pnt->vec128;
+    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = pnt->vec128;
+    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/vec_aos_v.h b/Extras/vectormathlibrary/include/vectormath/spu/c/vec_aos_v.h
index 7b41134e6..7cf0fa9cc 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/vec_aos_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/vec_aos_v.h
@@ -1,951 +1,951 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_AOS_V_C_H
-#define _VECTORMATH_VEC_AOS_V_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
- */
-#define _VECTORMATH_SHUF_X 0x00010203
-#define _VECTORMATH_SHUF_Y 0x04050607
-#define _VECTORMATH_SHUF_Z 0x08090a0b
-#define _VECTORMATH_SHUF_W 0x0c0d0e0f
-#define _VECTORMATH_SHUF_A 0x10111213
-#define _VECTORMATH_SHUF_B 0x14151617
-#define _VECTORMATH_SHUF_C 0x18191a1b
-#define _VECTORMATH_SHUF_D 0x1c1d1e1f
-#define _VECTORMATH_SHUF_0 0x80808080
-#define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
-#define _VECTORMATH_SHUF_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_W }
-#define _VECTORMATH_SHUF_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W }
-#define _VECTORMATH_SHUF_WABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
-#define _VECTORMATH_SHUF_ZWAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
-#define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
-#define _VECTORMATH_SHUF_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
-#define _VECTORMATH_SHUF_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
-#define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
-#define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline VmathVector3 vmathV3MakeFromElems_V( float _x, float _y, float _z )
-{
-    VmathVector3 result;
-    vmathV3MakeFromElems(&result, _x, _y, _z);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeFromP3_V( VmathPoint3 pnt )
-{
-    VmathVector3 result;
-    vmathV3MakeFromP3(&result, &pnt);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeFromScalar_V( float scalar )
-{
-    VmathVector3 result;
-    vmathV3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeFrom128_V( vec_float4 vf4 )
-{
-    VmathVector3 result;
-    vmathV3MakeFrom128(&result, vf4);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeXAxis_V( )
-{
-    VmathVector3 result;
-    vmathV3MakeXAxis(&result);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeYAxis_V( )
-{
-    VmathVector3 result;
-    vmathV3MakeYAxis(&result);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeZAxis_V( )
-{
-    VmathVector3 result;
-    vmathV3MakeZAxis(&result);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Lerp_V( float t, VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3Lerp(&result, t, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Slerp_V( float t, VmathVector3 unitVec0, VmathVector3 unitVec1 )
-{
-    VmathVector3 result;
-    vmathV3Slerp(&result, t, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline vec_float4 vmathV3Get128_V( VmathVector3 vec )
-{
-    return vmathV3Get128(&vec);
-}
-
-static inline void vmathV3StoreXYZ_V( VmathVector3 vec, vec_float4 *quad )
-{
-    vmathV3StoreXYZ(&vec, quad);
-}
-
-static inline void vmathV3LoadXYZArray_V( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads )
-{
-    vmathV3LoadXYZArray(vec0, vec1, vec2, vec3, threeQuads);
-}
-
-static inline void vmathV3StoreXYZArray_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, vec_float4 *threeQuads )
-{
-    vmathV3StoreXYZArray(&vec0, &vec1, &vec2, &vec3, threeQuads);
-}
-
-static inline void vmathV3StoreHalfFloats_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, VmathVector3 vec4, VmathVector3 vec5, VmathVector3 vec6, VmathVector3 vec7, vec_ushort8 *threeQuads )
-{
-    vmathV3StoreHalfFloats(&vec0, &vec1, &vec2, &vec3, &vec4, &vec5, &vec6, &vec7, threeQuads);
-}
-
-static inline void vmathV3SetX_V( VmathVector3 *result, float _x )
-{
-    vmathV3SetX(result, _x);
-}
-
-static inline float vmathV3GetX_V( VmathVector3 vec )
-{
-    return vmathV3GetX(&vec);
-}
-
-static inline void vmathV3SetY_V( VmathVector3 *result, float _y )
-{
-    vmathV3SetY(result, _y);
-}
-
-static inline float vmathV3GetY_V( VmathVector3 vec )
-{
-    return vmathV3GetY(&vec);
-}
-
-static inline void vmathV3SetZ_V( VmathVector3 *result, float _z )
-{
-    vmathV3SetZ(result, _z);
-}
-
-static inline float vmathV3GetZ_V( VmathVector3 vec )
-{
-    return vmathV3GetZ(&vec);
-}
-
-static inline void vmathV3SetElem_V( VmathVector3 *result, int idx, float value )
-{
-    vmathV3SetElem(result, idx, value);
-}
-
-static inline float vmathV3GetElem_V( VmathVector3 vec, int idx )
-{
-    return vmathV3GetElem(&vec, idx);
-}
-
-static inline VmathVector3 vmathV3Add_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3Add(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Sub_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3Sub(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathPoint3 vmathV3AddP3_V( VmathVector3 vec, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathV3AddP3(&result, &vec, &pnt1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3ScalarMul_V( VmathVector3 vec, float scalar )
-{
-    VmathVector3 result;
-    vmathV3ScalarMul(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathVector3 vmathV3ScalarDiv_V( VmathVector3 vec, float scalar )
-{
-    VmathVector3 result;
-    vmathV3ScalarDiv(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Neg_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3Neg(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MulPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3MulPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3DivPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3DivPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3RecipPerElem_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3RecipPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3SqrtPerElem_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3SqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3RsqrtPerElem_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3RsqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3AbsPerElem_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3AbsPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3CopySignPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3CopySignPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MaxPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3MaxPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline float vmathV3MaxElem_V( VmathVector3 vec )
-{
-    return vmathV3MaxElem(&vec);
-}
-
-static inline VmathVector3 vmathV3MinPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3MinPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline float vmathV3MinElem_V( VmathVector3 vec )
-{
-    return vmathV3MinElem(&vec);
-}
-
-static inline float vmathV3Sum_V( VmathVector3 vec )
-{
-    return vmathV3Sum(&vec);
-}
-
-static inline float vmathV3Dot_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    return vmathV3Dot(&vec0, &vec1);
-}
-
-static inline float vmathV3LengthSqr_V( VmathVector3 vec )
-{
-    return vmathV3LengthSqr(&vec);
-}
-
-static inline float vmathV3Length_V( VmathVector3 vec )
-{
-    return vmathV3Length(&vec);
-}
-
-static inline VmathVector3 vmathV3Normalize_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3Normalize(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Cross_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3Cross(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Select_V( VmathVector3 vec0, VmathVector3 vec1, unsigned int select1 )
-{
-    VmathVector3 result;
-    vmathV3Select(&result, &vec0, &vec1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathV3Print_V( VmathVector3 vec )
-{
-    vmathV3Print(&vec);
-}
-
-static inline void vmathV3Prints_V( VmathVector3 vec, const char *name )
-{
-    vmathV3Prints(&vec, name);
-}
-
-#endif
-
-static inline VmathVector4 vmathV4MakeFromElems_V( float _x, float _y, float _z, float _w )
-{
-    VmathVector4 result;
-    vmathV4MakeFromElems(&result, _x, _y, _z, _w);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromV3Scalar_V( VmathVector3 xyz, float _w )
-{
-    VmathVector4 result;
-    vmathV4MakeFromV3Scalar(&result, &xyz, _w);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromV3_V( VmathVector3 vec )
-{
-    VmathVector4 result;
-    vmathV4MakeFromV3(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromP3_V( VmathPoint3 pnt )
-{
-    VmathVector4 result;
-    vmathV4MakeFromP3(&result, &pnt);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromQ_V( VmathQuat quat )
-{
-    VmathVector4 result;
-    vmathV4MakeFromQ(&result, &quat);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromScalar_V( float scalar )
-{
-    VmathVector4 result;
-    vmathV4MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFrom128_V( vec_float4 vf4 )
-{
-    VmathVector4 result;
-    vmathV4MakeFrom128(&result, vf4);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeXAxis_V( )
-{
-    VmathVector4 result;
-    vmathV4MakeXAxis(&result);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeYAxis_V( )
-{
-    VmathVector4 result;
-    vmathV4MakeYAxis(&result);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeZAxis_V( )
-{
-    VmathVector4 result;
-    vmathV4MakeZAxis(&result);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeWAxis_V( )
-{
-    VmathVector4 result;
-    vmathV4MakeWAxis(&result);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Lerp_V( float t, VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4Lerp(&result, t, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Slerp_V( float t, VmathVector4 unitVec0, VmathVector4 unitVec1 )
-{
-    VmathVector4 result;
-    vmathV4Slerp(&result, t, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline vec_float4 vmathV4Get128_V( VmathVector4 vec )
-{
-    return vmathV4Get128(&vec);
-}
-
-static inline void vmathV4StoreHalfFloats_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3, vec_ushort8 *twoQuads )
-{
-    vmathV4StoreHalfFloats(&vec0, &vec1, &vec2, &vec3, twoQuads);
-}
-
-static inline void vmathV4SetXYZ_V( VmathVector4 *result, VmathVector3 vec )
-{
-    vmathV4SetXYZ(result, &vec);
-}
-
-static inline VmathVector3 vmathV4GetXYZ_V( VmathVector4 vec )
-{
-    VmathVector3 result;
-    vmathV4GetXYZ(&result, &vec);
-    return result;
-}
-
-static inline void vmathV4SetX_V( VmathVector4 *result, float _x )
-{
-    vmathV4SetX(result, _x);
-}
-
-static inline float vmathV4GetX_V( VmathVector4 vec )
-{
-    return vmathV4GetX(&vec);
-}
-
-static inline void vmathV4SetY_V( VmathVector4 *result, float _y )
-{
-    vmathV4SetY(result, _y);
-}
-
-static inline float vmathV4GetY_V( VmathVector4 vec )
-{
-    return vmathV4GetY(&vec);
-}
-
-static inline void vmathV4SetZ_V( VmathVector4 *result, float _z )
-{
-    vmathV4SetZ(result, _z);
-}
-
-static inline float vmathV4GetZ_V( VmathVector4 vec )
-{
-    return vmathV4GetZ(&vec);
-}
-
-static inline void vmathV4SetW_V( VmathVector4 *result, float _w )
-{
-    vmathV4SetW(result, _w);
-}
-
-static inline float vmathV4GetW_V( VmathVector4 vec )
-{
-    return vmathV4GetW(&vec);
-}
-
-static inline void vmathV4SetElem_V( VmathVector4 *result, int idx, float value )
-{
-    vmathV4SetElem(result, idx, value);
-}
-
-static inline float vmathV4GetElem_V( VmathVector4 vec, int idx )
-{
-    return vmathV4GetElem(&vec, idx);
-}
-
-static inline VmathVector4 vmathV4Add_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4Add(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Sub_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4Sub(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4ScalarMul_V( VmathVector4 vec, float scalar )
-{
-    VmathVector4 result;
-    vmathV4ScalarMul(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathVector4 vmathV4ScalarDiv_V( VmathVector4 vec, float scalar )
-{
-    VmathVector4 result;
-    vmathV4ScalarDiv(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Neg_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4Neg(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MulPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4MulPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4DivPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4DivPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4RecipPerElem_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4RecipPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4SqrtPerElem_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4SqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4RsqrtPerElem_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4RsqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4AbsPerElem_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4AbsPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4CopySignPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4CopySignPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MaxPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4MaxPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline float vmathV4MaxElem_V( VmathVector4 vec )
-{
-    return vmathV4MaxElem(&vec);
-}
-
-static inline VmathVector4 vmathV4MinPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4MinPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline float vmathV4MinElem_V( VmathVector4 vec )
-{
-    return vmathV4MinElem(&vec);
-}
-
-static inline float vmathV4Sum_V( VmathVector4 vec )
-{
-    return vmathV4Sum(&vec);
-}
-
-static inline float vmathV4Dot_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    return vmathV4Dot(&vec0, &vec1);
-}
-
-static inline float vmathV4LengthSqr_V( VmathVector4 vec )
-{
-    return vmathV4LengthSqr(&vec);
-}
-
-static inline float vmathV4Length_V( VmathVector4 vec )
-{
-    return vmathV4Length(&vec);
-}
-
-static inline VmathVector4 vmathV4Normalize_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4Normalize(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Select_V( VmathVector4 vec0, VmathVector4 vec1, unsigned int select1 )
-{
-    VmathVector4 result;
-    vmathV4Select(&result, &vec0, &vec1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathV4Print_V( VmathVector4 vec )
-{
-    vmathV4Print(&vec);
-}
-
-static inline void vmathV4Prints_V( VmathVector4 vec, const char *name )
-{
-    vmathV4Prints(&vec, name);
-}
-
-#endif
-
-static inline VmathPoint3 vmathP3MakeFromElems_V( float _x, float _y, float _z )
-{
-    VmathPoint3 result;
-    vmathP3MakeFromElems(&result, _x, _y, _z);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MakeFromV3_V( VmathVector3 vec )
-{
-    VmathPoint3 result;
-    vmathP3MakeFromV3(&result, &vec);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MakeFromScalar_V( float scalar )
-{
-    VmathPoint3 result;
-    vmathP3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MakeFrom128_V( vec_float4 vf4 )
-{
-    VmathPoint3 result;
-    vmathP3MakeFrom128(&result, vf4);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3Lerp_V( float t, VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3Lerp(&result, t, &pnt0, &pnt1);
-    return result;
-}
-
-static inline vec_float4 vmathP3Get128_V( VmathPoint3 pnt )
-{
-    return vmathP3Get128(&pnt);
-}
-
-static inline void vmathP3StoreXYZ_V( VmathPoint3 pnt, vec_float4 *quad )
-{
-    vmathP3StoreXYZ(&pnt, quad);
-}
-
-static inline void vmathP3LoadXYZArray_V( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads )
-{
-    vmathP3LoadXYZArray(pnt0, pnt1, pnt2, pnt3, threeQuads);
-}
-
-static inline void vmathP3StoreXYZArray_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, vec_float4 *threeQuads )
-{
-    vmathP3StoreXYZArray(&pnt0, &pnt1, &pnt2, &pnt3, threeQuads);
-}
-
-static inline void vmathP3StoreHalfFloats_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, VmathPoint3 pnt4, VmathPoint3 pnt5, VmathPoint3 pnt6, VmathPoint3 pnt7, vec_ushort8 *threeQuads )
-{
-    vmathP3StoreHalfFloats(&pnt0, &pnt1, &pnt2, &pnt3, &pnt4, &pnt5, &pnt6, &pnt7, threeQuads);
-}
-
-static inline void vmathP3SetX_V( VmathPoint3 *result, float _x )
-{
-    vmathP3SetX(result, _x);
-}
-
-static inline float vmathP3GetX_V( VmathPoint3 pnt )
-{
-    return vmathP3GetX(&pnt);
-}
-
-static inline void vmathP3SetY_V( VmathPoint3 *result, float _y )
-{
-    vmathP3SetY(result, _y);
-}
-
-static inline float vmathP3GetY_V( VmathPoint3 pnt )
-{
-    return vmathP3GetY(&pnt);
-}
-
-static inline void vmathP3SetZ_V( VmathPoint3 *result, float _z )
-{
-    vmathP3SetZ(result, _z);
-}
-
-static inline float vmathP3GetZ_V( VmathPoint3 pnt )
-{
-    return vmathP3GetZ(&pnt);
-}
-
-static inline void vmathP3SetElem_V( VmathPoint3 *result, int idx, float value )
-{
-    vmathP3SetElem(result, idx, value);
-}
-
-static inline float vmathP3GetElem_V( VmathPoint3 pnt, int idx )
-{
-    return vmathP3GetElem(&pnt, idx);
-}
-
-static inline VmathVector3 vmathP3Sub_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathVector3 result;
-    vmathP3Sub(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3AddV3_V( VmathPoint3 pnt, VmathVector3 vec1 )
-{
-    VmathPoint3 result;
-    vmathP3AddV3(&result, &pnt, &vec1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3SubV3_V( VmathPoint3 pnt, VmathVector3 vec1 )
-{
-    VmathPoint3 result;
-    vmathP3SubV3(&result, &pnt, &vec1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MulPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3MulPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3DivPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3DivPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3RecipPerElem_V( VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathP3RecipPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3SqrtPerElem_V( VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathP3SqrtPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3RsqrtPerElem_V( VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathP3RsqrtPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3AbsPerElem_V( VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathP3AbsPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3CopySignPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3CopySignPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MaxPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3MaxPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline float vmathP3MaxElem_V( VmathPoint3 pnt )
-{
-    return vmathP3MaxElem(&pnt);
-}
-
-static inline VmathPoint3 vmathP3MinPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3MinPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline float vmathP3MinElem_V( VmathPoint3 pnt )
-{
-    return vmathP3MinElem(&pnt);
-}
-
-static inline float vmathP3Sum_V( VmathPoint3 pnt )
-{
-    return vmathP3Sum(&pnt);
-}
-
-static inline VmathPoint3 vmathP3Scale_V( VmathPoint3 pnt, float scaleVal )
-{
-    VmathPoint3 result;
-    vmathP3Scale(&result, &pnt, scaleVal);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3NonUniformScale_V( VmathPoint3 pnt, VmathVector3 scaleVec )
-{
-    VmathPoint3 result;
-    vmathP3NonUniformScale(&result, &pnt, &scaleVec);
-    return result;
-}
-
-static inline float vmathP3Projection_V( VmathPoint3 pnt, VmathVector3 unitVec )
-{
-    return vmathP3Projection(&pnt, &unitVec);
-}
-
-static inline float vmathP3DistSqrFromOrigin_V( VmathPoint3 pnt )
-{
-    return vmathP3DistSqrFromOrigin(&pnt);
-}
-
-static inline float vmathP3DistFromOrigin_V( VmathPoint3 pnt )
-{
-    return vmathP3DistFromOrigin(&pnt);
-}
-
-static inline float vmathP3DistSqr_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    return vmathP3DistSqr(&pnt0, &pnt1);
-}
-
-static inline float vmathP3Dist_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    return vmathP3Dist(&pnt0, &pnt1);
-}
-
-static inline VmathPoint3 vmathP3Select_V( VmathPoint3 pnt0, VmathPoint3 pnt1, unsigned int select1 )
-{
-    VmathPoint3 result;
-    vmathP3Select(&result, &pnt0, &pnt1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathP3Print_V( VmathPoint3 pnt )
-{
-    vmathP3Print(&pnt);
-}
-
-static inline void vmathP3Prints_V( VmathPoint3 pnt, const char *name )
-{
-    vmathP3Prints(&pnt, name);
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_AOS_V_C_H
+#define _VECTORMATH_VEC_AOS_V_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+ */
+#define _VECTORMATH_SHUF_X 0x00010203
+#define _VECTORMATH_SHUF_Y 0x04050607
+#define _VECTORMATH_SHUF_Z 0x08090a0b
+#define _VECTORMATH_SHUF_W 0x0c0d0e0f
+#define _VECTORMATH_SHUF_A 0x10111213
+#define _VECTORMATH_SHUF_B 0x14151617
+#define _VECTORMATH_SHUF_C 0x18191a1b
+#define _VECTORMATH_SHUF_D 0x1c1d1e1f
+#define _VECTORMATH_SHUF_0 0x80808080
+#define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
+#define _VECTORMATH_SHUF_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_W }
+#define _VECTORMATH_SHUF_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W }
+#define _VECTORMATH_SHUF_WABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
+#define _VECTORMATH_SHUF_ZWAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
+#define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
+#define _VECTORMATH_SHUF_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
+#define _VECTORMATH_SHUF_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
+#define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
+#define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline VmathVector3 vmathV3MakeFromElems_V( float _x, float _y, float _z )
+{
+    VmathVector3 result;
+    vmathV3MakeFromElems(&result, _x, _y, _z);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeFromP3_V( VmathPoint3 pnt )
+{
+    VmathVector3 result;
+    vmathV3MakeFromP3(&result, &pnt);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeFromScalar_V( float scalar )
+{
+    VmathVector3 result;
+    vmathV3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeFrom128_V( vec_float4 vf4 )
+{
+    VmathVector3 result;
+    vmathV3MakeFrom128(&result, vf4);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeXAxis_V( )
+{
+    VmathVector3 result;
+    vmathV3MakeXAxis(&result);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeYAxis_V( )
+{
+    VmathVector3 result;
+    vmathV3MakeYAxis(&result);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeZAxis_V( )
+{
+    VmathVector3 result;
+    vmathV3MakeZAxis(&result);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Lerp_V( float t, VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3Lerp(&result, t, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Slerp_V( float t, VmathVector3 unitVec0, VmathVector3 unitVec1 )
+{
+    VmathVector3 result;
+    vmathV3Slerp(&result, t, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline vec_float4 vmathV3Get128_V( VmathVector3 vec )
+{
+    return vmathV3Get128(&vec);
+}
+
+static inline void vmathV3StoreXYZ_V( VmathVector3 vec, vec_float4 *quad )
+{
+    vmathV3StoreXYZ(&vec, quad);
+}
+
+static inline void vmathV3LoadXYZArray_V( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads )
+{
+    vmathV3LoadXYZArray(vec0, vec1, vec2, vec3, threeQuads);
+}
+
+static inline void vmathV3StoreXYZArray_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, vec_float4 *threeQuads )
+{
+    vmathV3StoreXYZArray(&vec0, &vec1, &vec2, &vec3, threeQuads);
+}
+
+static inline void vmathV3StoreHalfFloats_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, VmathVector3 vec4, VmathVector3 vec5, VmathVector3 vec6, VmathVector3 vec7, vec_ushort8 *threeQuads )
+{
+    vmathV3StoreHalfFloats(&vec0, &vec1, &vec2, &vec3, &vec4, &vec5, &vec6, &vec7, threeQuads);
+}
+
+static inline void vmathV3SetX_V( VmathVector3 *result, float _x )
+{
+    vmathV3SetX(result, _x);
+}
+
+static inline float vmathV3GetX_V( VmathVector3 vec )
+{
+    return vmathV3GetX(&vec);
+}
+
+static inline void vmathV3SetY_V( VmathVector3 *result, float _y )
+{
+    vmathV3SetY(result, _y);
+}
+
+static inline float vmathV3GetY_V( VmathVector3 vec )
+{
+    return vmathV3GetY(&vec);
+}
+
+static inline void vmathV3SetZ_V( VmathVector3 *result, float _z )
+{
+    vmathV3SetZ(result, _z);
+}
+
+static inline float vmathV3GetZ_V( VmathVector3 vec )
+{
+    return vmathV3GetZ(&vec);
+}
+
+static inline void vmathV3SetElem_V( VmathVector3 *result, int idx, float value )
+{
+    vmathV3SetElem(result, idx, value);
+}
+
+static inline float vmathV3GetElem_V( VmathVector3 vec, int idx )
+{
+    return vmathV3GetElem(&vec, idx);
+}
+
+static inline VmathVector3 vmathV3Add_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3Add(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Sub_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3Sub(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathPoint3 vmathV3AddP3_V( VmathVector3 vec, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathV3AddP3(&result, &vec, &pnt1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3ScalarMul_V( VmathVector3 vec, float scalar )
+{
+    VmathVector3 result;
+    vmathV3ScalarMul(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathVector3 vmathV3ScalarDiv_V( VmathVector3 vec, float scalar )
+{
+    VmathVector3 result;
+    vmathV3ScalarDiv(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Neg_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3Neg(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MulPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3MulPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3DivPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3DivPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3RecipPerElem_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3RecipPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3SqrtPerElem_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3SqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3RsqrtPerElem_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3RsqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3AbsPerElem_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3AbsPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3CopySignPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3CopySignPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MaxPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3MaxPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline float vmathV3MaxElem_V( VmathVector3 vec )
+{
+    return vmathV3MaxElem(&vec);
+}
+
+static inline VmathVector3 vmathV3MinPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3MinPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline float vmathV3MinElem_V( VmathVector3 vec )
+{
+    return vmathV3MinElem(&vec);
+}
+
+static inline float vmathV3Sum_V( VmathVector3 vec )
+{
+    return vmathV3Sum(&vec);
+}
+
+static inline float vmathV3Dot_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    return vmathV3Dot(&vec0, &vec1);
+}
+
+static inline float vmathV3LengthSqr_V( VmathVector3 vec )
+{
+    return vmathV3LengthSqr(&vec);
+}
+
+static inline float vmathV3Length_V( VmathVector3 vec )
+{
+    return vmathV3Length(&vec);
+}
+
+static inline VmathVector3 vmathV3Normalize_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3Normalize(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Cross_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3Cross(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Select_V( VmathVector3 vec0, VmathVector3 vec1, unsigned int select1 )
+{
+    VmathVector3 result;
+    vmathV3Select(&result, &vec0, &vec1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathV3Print_V( VmathVector3 vec )
+{
+    vmathV3Print(&vec);
+}
+
+static inline void vmathV3Prints_V( VmathVector3 vec, const char *name )
+{
+    vmathV3Prints(&vec, name);
+}
+
+#endif
+
+static inline VmathVector4 vmathV4MakeFromElems_V( float _x, float _y, float _z, float _w )
+{
+    VmathVector4 result;
+    vmathV4MakeFromElems(&result, _x, _y, _z, _w);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromV3Scalar_V( VmathVector3 xyz, float _w )
+{
+    VmathVector4 result;
+    vmathV4MakeFromV3Scalar(&result, &xyz, _w);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromV3_V( VmathVector3 vec )
+{
+    VmathVector4 result;
+    vmathV4MakeFromV3(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromP3_V( VmathPoint3 pnt )
+{
+    VmathVector4 result;
+    vmathV4MakeFromP3(&result, &pnt);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromQ_V( VmathQuat quat )
+{
+    VmathVector4 result;
+    vmathV4MakeFromQ(&result, &quat);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromScalar_V( float scalar )
+{
+    VmathVector4 result;
+    vmathV4MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFrom128_V( vec_float4 vf4 )
+{
+    VmathVector4 result;
+    vmathV4MakeFrom128(&result, vf4);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeXAxis_V( )
+{
+    VmathVector4 result;
+    vmathV4MakeXAxis(&result);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeYAxis_V( )
+{
+    VmathVector4 result;
+    vmathV4MakeYAxis(&result);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeZAxis_V( )
+{
+    VmathVector4 result;
+    vmathV4MakeZAxis(&result);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeWAxis_V( )
+{
+    VmathVector4 result;
+    vmathV4MakeWAxis(&result);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Lerp_V( float t, VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4Lerp(&result, t, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Slerp_V( float t, VmathVector4 unitVec0, VmathVector4 unitVec1 )
+{
+    VmathVector4 result;
+    vmathV4Slerp(&result, t, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline vec_float4 vmathV4Get128_V( VmathVector4 vec )
+{
+    return vmathV4Get128(&vec);
+}
+
+static inline void vmathV4StoreHalfFloats_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3, vec_ushort8 *twoQuads )
+{
+    vmathV4StoreHalfFloats(&vec0, &vec1, &vec2, &vec3, twoQuads);
+}
+
+static inline void vmathV4SetXYZ_V( VmathVector4 *result, VmathVector3 vec )
+{
+    vmathV4SetXYZ(result, &vec);
+}
+
+static inline VmathVector3 vmathV4GetXYZ_V( VmathVector4 vec )
+{
+    VmathVector3 result;
+    vmathV4GetXYZ(&result, &vec);
+    return result;
+}
+
+static inline void vmathV4SetX_V( VmathVector4 *result, float _x )
+{
+    vmathV4SetX(result, _x);
+}
+
+static inline float vmathV4GetX_V( VmathVector4 vec )
+{
+    return vmathV4GetX(&vec);
+}
+
+static inline void vmathV4SetY_V( VmathVector4 *result, float _y )
+{
+    vmathV4SetY(result, _y);
+}
+
+static inline float vmathV4GetY_V( VmathVector4 vec )
+{
+    return vmathV4GetY(&vec);
+}
+
+static inline void vmathV4SetZ_V( VmathVector4 *result, float _z )
+{
+    vmathV4SetZ(result, _z);
+}
+
+static inline float vmathV4GetZ_V( VmathVector4 vec )
+{
+    return vmathV4GetZ(&vec);
+}
+
+static inline void vmathV4SetW_V( VmathVector4 *result, float _w )
+{
+    vmathV4SetW(result, _w);
+}
+
+static inline float vmathV4GetW_V( VmathVector4 vec )
+{
+    return vmathV4GetW(&vec);
+}
+
+static inline void vmathV4SetElem_V( VmathVector4 *result, int idx, float value )
+{
+    vmathV4SetElem(result, idx, value);
+}
+
+static inline float vmathV4GetElem_V( VmathVector4 vec, int idx )
+{
+    return vmathV4GetElem(&vec, idx);
+}
+
+static inline VmathVector4 vmathV4Add_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4Add(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Sub_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4Sub(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4ScalarMul_V( VmathVector4 vec, float scalar )
+{
+    VmathVector4 result;
+    vmathV4ScalarMul(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathVector4 vmathV4ScalarDiv_V( VmathVector4 vec, float scalar )
+{
+    VmathVector4 result;
+    vmathV4ScalarDiv(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Neg_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4Neg(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MulPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4MulPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4DivPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4DivPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4RecipPerElem_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4RecipPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4SqrtPerElem_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4SqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4RsqrtPerElem_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4RsqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4AbsPerElem_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4AbsPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4CopySignPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4CopySignPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MaxPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4MaxPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline float vmathV4MaxElem_V( VmathVector4 vec )
+{
+    return vmathV4MaxElem(&vec);
+}
+
+static inline VmathVector4 vmathV4MinPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4MinPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline float vmathV4MinElem_V( VmathVector4 vec )
+{
+    return vmathV4MinElem(&vec);
+}
+
+static inline float vmathV4Sum_V( VmathVector4 vec )
+{
+    return vmathV4Sum(&vec);
+}
+
+static inline float vmathV4Dot_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    return vmathV4Dot(&vec0, &vec1);
+}
+
+static inline float vmathV4LengthSqr_V( VmathVector4 vec )
+{
+    return vmathV4LengthSqr(&vec);
+}
+
+static inline float vmathV4Length_V( VmathVector4 vec )
+{
+    return vmathV4Length(&vec);
+}
+
+static inline VmathVector4 vmathV4Normalize_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4Normalize(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Select_V( VmathVector4 vec0, VmathVector4 vec1, unsigned int select1 )
+{
+    VmathVector4 result;
+    vmathV4Select(&result, &vec0, &vec1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathV4Print_V( VmathVector4 vec )
+{
+    vmathV4Print(&vec);
+}
+
+static inline void vmathV4Prints_V( VmathVector4 vec, const char *name )
+{
+    vmathV4Prints(&vec, name);
+}
+
+#endif
+
+static inline VmathPoint3 vmathP3MakeFromElems_V( float _x, float _y, float _z )
+{
+    VmathPoint3 result;
+    vmathP3MakeFromElems(&result, _x, _y, _z);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MakeFromV3_V( VmathVector3 vec )
+{
+    VmathPoint3 result;
+    vmathP3MakeFromV3(&result, &vec);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MakeFromScalar_V( float scalar )
+{
+    VmathPoint3 result;
+    vmathP3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MakeFrom128_V( vec_float4 vf4 )
+{
+    VmathPoint3 result;
+    vmathP3MakeFrom128(&result, vf4);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3Lerp_V( float t, VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3Lerp(&result, t, &pnt0, &pnt1);
+    return result;
+}
+
+static inline vec_float4 vmathP3Get128_V( VmathPoint3 pnt )
+{
+    return vmathP3Get128(&pnt);
+}
+
+static inline void vmathP3StoreXYZ_V( VmathPoint3 pnt, vec_float4 *quad )
+{
+    vmathP3StoreXYZ(&pnt, quad);
+}
+
+static inline void vmathP3LoadXYZArray_V( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads )
+{
+    vmathP3LoadXYZArray(pnt0, pnt1, pnt2, pnt3, threeQuads);
+}
+
+static inline void vmathP3StoreXYZArray_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, vec_float4 *threeQuads )
+{
+    vmathP3StoreXYZArray(&pnt0, &pnt1, &pnt2, &pnt3, threeQuads);
+}
+
+static inline void vmathP3StoreHalfFloats_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, VmathPoint3 pnt4, VmathPoint3 pnt5, VmathPoint3 pnt6, VmathPoint3 pnt7, vec_ushort8 *threeQuads )
+{
+    vmathP3StoreHalfFloats(&pnt0, &pnt1, &pnt2, &pnt3, &pnt4, &pnt5, &pnt6, &pnt7, threeQuads);
+}
+
+static inline void vmathP3SetX_V( VmathPoint3 *result, float _x )
+{
+    vmathP3SetX(result, _x);
+}
+
+static inline float vmathP3GetX_V( VmathPoint3 pnt )
+{
+    return vmathP3GetX(&pnt);
+}
+
+static inline void vmathP3SetY_V( VmathPoint3 *result, float _y )
+{
+    vmathP3SetY(result, _y);
+}
+
+static inline float vmathP3GetY_V( VmathPoint3 pnt )
+{
+    return vmathP3GetY(&pnt);
+}
+
+static inline void vmathP3SetZ_V( VmathPoint3 *result, float _z )
+{
+    vmathP3SetZ(result, _z);
+}
+
+static inline float vmathP3GetZ_V( VmathPoint3 pnt )
+{
+    return vmathP3GetZ(&pnt);
+}
+
+static inline void vmathP3SetElem_V( VmathPoint3 *result, int idx, float value )
+{
+    vmathP3SetElem(result, idx, value);
+}
+
+static inline float vmathP3GetElem_V( VmathPoint3 pnt, int idx )
+{
+    return vmathP3GetElem(&pnt, idx);
+}
+
+static inline VmathVector3 vmathP3Sub_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathVector3 result;
+    vmathP3Sub(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3AddV3_V( VmathPoint3 pnt, VmathVector3 vec1 )
+{
+    VmathPoint3 result;
+    vmathP3AddV3(&result, &pnt, &vec1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3SubV3_V( VmathPoint3 pnt, VmathVector3 vec1 )
+{
+    VmathPoint3 result;
+    vmathP3SubV3(&result, &pnt, &vec1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MulPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3MulPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3DivPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3DivPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3RecipPerElem_V( VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathP3RecipPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3SqrtPerElem_V( VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathP3SqrtPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3RsqrtPerElem_V( VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathP3RsqrtPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3AbsPerElem_V( VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathP3AbsPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3CopySignPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3CopySignPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MaxPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3MaxPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline float vmathP3MaxElem_V( VmathPoint3 pnt )
+{
+    return vmathP3MaxElem(&pnt);
+}
+
+static inline VmathPoint3 vmathP3MinPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3MinPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline float vmathP3MinElem_V( VmathPoint3 pnt )
+{
+    return vmathP3MinElem(&pnt);
+}
+
+static inline float vmathP3Sum_V( VmathPoint3 pnt )
+{
+    return vmathP3Sum(&pnt);
+}
+
+static inline VmathPoint3 vmathP3Scale_V( VmathPoint3 pnt, float scaleVal )
+{
+    VmathPoint3 result;
+    vmathP3Scale(&result, &pnt, scaleVal);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3NonUniformScale_V( VmathPoint3 pnt, VmathVector3 scaleVec )
+{
+    VmathPoint3 result;
+    vmathP3NonUniformScale(&result, &pnt, &scaleVec);
+    return result;
+}
+
+static inline float vmathP3Projection_V( VmathPoint3 pnt, VmathVector3 unitVec )
+{
+    return vmathP3Projection(&pnt, &unitVec);
+}
+
+static inline float vmathP3DistSqrFromOrigin_V( VmathPoint3 pnt )
+{
+    return vmathP3DistSqrFromOrigin(&pnt);
+}
+
+static inline float vmathP3DistFromOrigin_V( VmathPoint3 pnt )
+{
+    return vmathP3DistFromOrigin(&pnt);
+}
+
+static inline float vmathP3DistSqr_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    return vmathP3DistSqr(&pnt0, &pnt1);
+}
+
+static inline float vmathP3Dist_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    return vmathP3Dist(&pnt0, &pnt1);
+}
+
+static inline VmathPoint3 vmathP3Select_V( VmathPoint3 pnt0, VmathPoint3 pnt1, unsigned int select1 )
+{
+    VmathPoint3 result;
+    vmathP3Select(&result, &pnt0, &pnt1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathP3Print_V( VmathPoint3 pnt )
+{
+    vmathP3Print(&pnt);
+}
+
+static inline void vmathP3Prints_V( VmathPoint3 pnt, const char *name )
+{
+    vmathP3Prints(&pnt, name);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/vec_soa.h b/Extras/vectormathlibrary/include/vectormath/spu/c/vec_soa.h
index fde4b0015..608b38562 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/vec_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/vec_soa.h
@@ -1,1237 +1,1237 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_SOA_C_H
-#define _VECTORMATH_VEC_SOA_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
- */
-#define _VECTORMATH_SHUF_X 0x00010203
-#define _VECTORMATH_SHUF_Y 0x04050607
-#define _VECTORMATH_SHUF_Z 0x08090a0b
-#define _VECTORMATH_SHUF_W 0x0c0d0e0f
-#define _VECTORMATH_SHUF_A 0x10111213
-#define _VECTORMATH_SHUF_B 0x14151617
-#define _VECTORMATH_SHUF_C 0x18191a1b
-#define _VECTORMATH_SHUF_D 0x1c1d1e1f
-#define _VECTORMATH_SHUF_0 0x80808080
-#define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_ZDW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_ZDXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_YAWC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_XYCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline void vmathSoaV3Copy( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-}
-
-static inline void vmathSoaV3MakeFromElems( VmathSoaVector3 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z )
-{
-    result->x = _x;
-    result->y = _y;
-    result->z = _z;
-}
-
-static inline void vmathSoaV3MakeFromP3( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = pnt->x;
-    result->y = pnt->y;
-    result->z = pnt->z;
-}
-
-static inline void vmathSoaV3MakeFromScalar( VmathSoaVector3 *result, vec_float4 scalar )
-{
-    result->x = scalar;
-    result->y = scalar;
-    result->z = scalar;
-}
-
-static inline void vmathSoaV3MakeFromAos( VmathSoaVector3 *result, const VmathVector3 *vec )
-{
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    vec_float4 vec128 = vec->vec128;
-    result->x = spu_shuffle( vec128, vec128, shuffle_xxxx );
-    result->y = spu_shuffle( vec128, vec128, shuffle_yyyy );
-    result->z = spu_shuffle( vec128, vec128, shuffle_zzzz );
-}
-
-static inline void vmathSoaV3MakeFrom4Aos( VmathSoaVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = spu_shuffle( vec0->vec128, vec2->vec128, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( vec1->vec128, vec3->vec128, _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( vec0->vec128, vec2->vec128, _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( vec1->vec128, vec3->vec128, _VECTORMATH_SHUF_ZCWD );
-    result->x = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
-    result->y = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
-    result->z = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
-}
-
-static inline void vmathSoaV3MakeXAxis( VmathSoaVector3 *result )
-{
-    vmathSoaV3MakeFromElems( result, spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f) );
-}
-
-static inline void vmathSoaV3MakeYAxis( VmathSoaVector3 *result )
-{
-    vmathSoaV3MakeFromElems( result, spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f) );
-}
-
-static inline void vmathSoaV3MakeZAxis( VmathSoaVector3 *result )
-{
-    vmathSoaV3MakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
-}
-
-static inline void vmathSoaV3Lerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    VmathSoaVector3 tmpV3_0, tmpV3_1;
-    vmathSoaV3Sub( &tmpV3_0, vec1, vec0 );
-    vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
-    vmathSoaV3Add( result, vec0, &tmpV3_1 );
-}
-
-static inline void vmathSoaV3Slerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 )
-{
-    VmathSoaVector3 tmpV3_0, tmpV3_1;
-    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
-    vec_uint4 selectMask;
-    cosAngle = vmathSoaV3Dot( unitVec0, unitVec1 );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    recipSinAngle = recipf4( sinf4( angle ) );
-    scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
-    scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
-    vmathSoaV3ScalarMul( &tmpV3_0, unitVec0, scale0 );
-    vmathSoaV3ScalarMul( &tmpV3_1, unitVec1, scale1 );
-    vmathSoaV3Add( result, &tmpV3_0, &tmpV3_1 );
-}
-
-static inline void vmathSoaV3Get4Aos( const VmathSoaVector3 *vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 )
-{
-    vec_float4 tmp0, tmp1;
-    tmp0 = spu_shuffle( vec->x, vec->z, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( vec->x, vec->z, _VECTORMATH_SHUF_ZCWD );
-    vmathV3MakeFrom128( result0, spu_shuffle( tmp0, vec->y, _VECTORMATH_SHUF_XAYB ) );
-    vmathV3MakeFrom128( result1, spu_shuffle( tmp0, vec->y, _VECTORMATH_SHUF_ZBW0 ) );
-    vmathV3MakeFrom128( result2, spu_shuffle( tmp1, vec->y, _VECTORMATH_SHUF_XCY0 ) );
-    vmathV3MakeFrom128( result3, spu_shuffle( tmp1, vec->y, _VECTORMATH_SHUF_ZDW0 ) );
-}
-
-static inline void vmathSoaV3LoadXYZArray( VmathSoaVector3 *vec, const vec_float4 *threeQuads )
-{
-    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyxy = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_XYCD );
-    zxzx = spu_shuffle( zxyz, xyzx, _VECTORMATH_SHUF_XYCD );
-    yzyz = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_XYCD );
-    vmathSoaV3SetX( vec, spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XDZB ) );
-    vmathSoaV3SetY( vec, spu_shuffle( xyxy, yzyz, _VECTORMATH_SHUF_YAWC ) );
-    vmathSoaV3SetZ( vec, spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_ZBXD ) );
-}
-
-static inline void vmathSoaV3StoreXYZArray( const VmathSoaVector3 *vec, vec_float4 *threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
-    xyxy = spu_shuffle( vec->x, vec->y, _VECTORMATH_SHUF_XAZC );
-    zxzx = spu_shuffle( vec->z, vec->x, _VECTORMATH_SHUF_ZDXB );
-    yzyz = spu_shuffle( vec->y, vec->z, _VECTORMATH_SHUF_YBWD );
-    xyzx = spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XYCD );
-    yzxy = spu_shuffle( yzyz, xyxy, _VECTORMATH_SHUF_XYCD );
-    zxyz = spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_XYCD );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-static inline void vmathSoaV3StoreHalfFloats( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_ushort8 *threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    vmathSoaV3StoreXYZArray( vec0, xyz0 );
-    vmathSoaV3StoreXYZArray( vec1, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-static inline void vmathSoaV3SetX( VmathSoaVector3 *result, vec_float4 _x )
-{
-    result->x = _x;
-}
-
-static inline vec_float4 vmathSoaV3GetX( const VmathSoaVector3 *vec )
-{
-    return vec->x;
-}
-
-static inline void vmathSoaV3SetY( VmathSoaVector3 *result, vec_float4 _y )
-{
-    result->y = _y;
-}
-
-static inline vec_float4 vmathSoaV3GetY( const VmathSoaVector3 *vec )
-{
-    return vec->y;
-}
-
-static inline void vmathSoaV3SetZ( VmathSoaVector3 *result, vec_float4 _z )
-{
-    result->z = _z;
-}
-
-static inline vec_float4 vmathSoaV3GetZ( const VmathSoaVector3 *vec )
-{
-    return vec->z;
-}
-
-static inline void vmathSoaV3SetElem( VmathSoaVector3 *result, int idx, vec_float4 value )
-{
-    *(&result->x + idx) = value;
-}
-
-static inline vec_float4 vmathSoaV3GetElem( const VmathSoaVector3 *vec, int idx )
-{
-    return *(&vec->x + idx);
-}
-
-static inline void vmathSoaV3Add( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = spu_add( vec0->x, vec1->x );
-    result->y = spu_add( vec0->y, vec1->y );
-    result->z = spu_add( vec0->z, vec1->z );
-}
-
-static inline void vmathSoaV3Sub( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = spu_sub( vec0->x, vec1->x );
-    result->y = spu_sub( vec0->y, vec1->y );
-    result->z = spu_sub( vec0->z, vec1->z );
-}
-
-static inline void vmathSoaV3AddP3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = spu_add( vec->x, pnt1->x );
-    result->y = spu_add( vec->y, pnt1->y );
-    result->z = spu_add( vec->z, pnt1->z );
-}
-
-static inline void vmathSoaV3ScalarMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar )
-{
-    result->x = spu_mul( vec->x, scalar );
-    result->y = spu_mul( vec->y, scalar );
-    result->z = spu_mul( vec->z, scalar );
-}
-
-static inline void vmathSoaV3ScalarDiv( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar )
-{
-    result->x = divf4( vec->x, scalar );
-    result->y = divf4( vec->y, scalar );
-    result->z = divf4( vec->z, scalar );
-}
-
-static inline void vmathSoaV3Neg( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = negatef4( vec->x );
-    result->y = negatef4( vec->y );
-    result->z = negatef4( vec->z );
-}
-
-static inline void vmathSoaV3MulPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = spu_mul( vec0->x, vec1->x );
-    result->y = spu_mul( vec0->y, vec1->y );
-    result->z = spu_mul( vec0->z, vec1->z );
-}
-
-static inline void vmathSoaV3DivPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = divf4( vec0->x, vec1->x );
-    result->y = divf4( vec0->y, vec1->y );
-    result->z = divf4( vec0->z, vec1->z );
-}
-
-static inline void vmathSoaV3RecipPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = recipf4( vec->x );
-    result->y = recipf4( vec->y );
-    result->z = recipf4( vec->z );
-}
-
-static inline void vmathSoaV3SqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = sqrtf4( vec->x );
-    result->y = sqrtf4( vec->y );
-    result->z = sqrtf4( vec->z );
-}
-
-static inline void vmathSoaV3RsqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = rsqrtf4( vec->x );
-    result->y = rsqrtf4( vec->y );
-    result->z = rsqrtf4( vec->z );
-}
-
-static inline void vmathSoaV3AbsPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = fabsf4( vec->x );
-    result->y = fabsf4( vec->y );
-    result->z = fabsf4( vec->z );
-}
-
-static inline void vmathSoaV3CopySignPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = copysignf4( vec0->x, vec1->x );
-    result->y = copysignf4( vec0->y, vec1->y );
-    result->z = copysignf4( vec0->z, vec1->z );
-}
-
-static inline void vmathSoaV3MaxPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = fmaxf4( vec0->x, vec1->x );
-    result->y = fmaxf4( vec0->y, vec1->y );
-    result->z = fmaxf4( vec0->z, vec1->z );
-}
-
-static inline vec_float4 vmathSoaV3MaxElem( const VmathSoaVector3 *vec )
-{
-    vec_float4 result;
-    result = fmaxf4( vec->x, vec->y );
-    result = fmaxf4( vec->z, result );
-    return result;
-}
-
-static inline void vmathSoaV3MinPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = fminf4( vec0->x, vec1->x );
-    result->y = fminf4( vec0->y, vec1->y );
-    result->z = fminf4( vec0->z, vec1->z );
-}
-
-static inline vec_float4 vmathSoaV3MinElem( const VmathSoaVector3 *vec )
-{
-    vec_float4 result;
-    result = fminf4( vec->x, vec->y );
-    result = fminf4( vec->z, result );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV3Sum( const VmathSoaVector3 *vec )
-{
-    vec_float4 result;
-    result = spu_add( vec->x, vec->y );
-    result = spu_add( result, vec->z );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV3Dot( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    vec_float4 result;
-    result = spu_mul( vec0->x, vec1->x );
-    result = spu_add( result, spu_mul( vec0->y, vec1->y ) );
-    result = spu_add( result, spu_mul( vec0->z, vec1->z ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV3LengthSqr( const VmathSoaVector3 *vec )
-{
-    vec_float4 result;
-    result = spu_mul( vec->x, vec->x );
-    result = spu_add( result, spu_mul( vec->y, vec->y ) );
-    result = spu_add( result, spu_mul( vec->z, vec->z ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV3Length( const VmathSoaVector3 *vec )
-{
-    return sqrtf4( vmathSoaV3LengthSqr( vec ) );
-}
-
-static inline void vmathSoaV3Normalize( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    vec_float4 lenSqr, lenInv;
-    lenSqr = vmathSoaV3LengthSqr( vec );
-    lenInv = rsqrtf4( lenSqr );
-    result->x = spu_mul( vec->x, lenInv );
-    result->y = spu_mul( vec->y, lenInv );
-    result->z = spu_mul( vec->z, lenInv );
-}
-
-static inline void vmathSoaV3Cross( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    vec_float4 tmpX, tmpY, tmpZ;
-    tmpX = spu_sub( spu_mul( vec0->y, vec1->z ), spu_mul( vec0->z, vec1->y ) );
-    tmpY = spu_sub( spu_mul( vec0->z, vec1->x ), spu_mul( vec0->x, vec1->z ) );
-    tmpZ = spu_sub( spu_mul( vec0->x, vec1->y ), spu_mul( vec0->y, vec1->x ) );
-    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathSoaV3Select( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_uint4 select1 )
-{
-    result->x = spu_sel( vec0->x, vec1->x, select1 );
-    result->y = spu_sel( vec0->y, vec1->y, select1 );
-    result->z = spu_sel( vec0->z, vec1->z, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaV3Print( const VmathSoaVector3 *vec )
-{
-    VmathVector3 vec0, vec1, vec2, vec3;
-    vmathSoaV3Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathV3Print( &vec0 );
-    printf("slot 1:\n");
-    vmathV3Print( &vec1 );
-    printf("slot 2:\n");
-    vmathV3Print( &vec2 );
-    printf("slot 3:\n");
-    vmathV3Print( &vec3 );
-}
-
-static inline void vmathSoaV3Prints( const VmathSoaVector3 *vec, const char *name )
-{
-    VmathVector3 vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    vmathSoaV3Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathV3Print( &vec0 );
-    printf("slot 1:\n");
-    vmathV3Print( &vec1 );
-    printf("slot 2:\n");
-    vmathV3Print( &vec2 );
-    printf("slot 3:\n");
-    vmathV3Print( &vec3 );
-}
-
-#endif
-
-static inline void vmathSoaV4Copy( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-    result->w = vec->w;
-}
-
-static inline void vmathSoaV4MakeFromElems( VmathSoaVector4 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
-{
-    result->x = _x;
-    result->y = _y;
-    result->z = _z;
-    result->w = _w;
-}
-
-static inline void vmathSoaV4MakeFromV3Scalar( VmathSoaVector4 *result, const VmathSoaVector3 *xyz, vec_float4 _w )
-{
-    vmathSoaV4SetXYZ( result, xyz );
-    vmathSoaV4SetW( result, _w );
-}
-
-static inline void vmathSoaV4MakeFromV3( VmathSoaVector4 *result, const VmathSoaVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-    result->w = spu_splats(0.0f);
-}
-
-static inline void vmathSoaV4MakeFromP3( VmathSoaVector4 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = pnt->x;
-    result->y = pnt->y;
-    result->z = pnt->z;
-    result->w = spu_splats(1.0f);
-}
-
-static inline void vmathSoaV4MakeFromQ( VmathSoaVector4 *result, const VmathSoaQuat *quat )
-{
-    result->x = quat->x;
-    result->y = quat->y;
-    result->z = quat->z;
-    result->w = quat->w;
-}
-
-static inline void vmathSoaV4MakeFromScalar( VmathSoaVector4 *result, vec_float4 scalar )
-{
-    result->x = scalar;
-    result->y = scalar;
-    result->z = scalar;
-    result->w = scalar;
-}
-
-static inline void vmathSoaV4MakeFromAos( VmathSoaVector4 *result, const VmathVector4 *vec )
-{
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
-    vec_float4 vec128 = vec->vec128;
-    result->x = spu_shuffle( vec128, vec128, shuffle_xxxx );
-    result->y = spu_shuffle( vec128, vec128, shuffle_yyyy );
-    result->z = spu_shuffle( vec128, vec128, shuffle_zzzz );
-    result->w = spu_shuffle( vec128, vec128, shuffle_wwww );
-}
-
-static inline void vmathSoaV4MakeFrom4Aos( VmathSoaVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = spu_shuffle( vec0->vec128, vec2->vec128, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( vec1->vec128, vec3->vec128, _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( vec0->vec128, vec2->vec128, _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( vec1->vec128, vec3->vec128, _VECTORMATH_SHUF_ZCWD );
-    result->x = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
-    result->y = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
-    result->z = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
-    result->w = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
-}
-
-static inline void vmathSoaV4MakeXAxis( VmathSoaVector4 *result )
-{
-    vmathSoaV4MakeFromElems( result, spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
-}
-
-static inline void vmathSoaV4MakeYAxis( VmathSoaVector4 *result )
-{
-    vmathSoaV4MakeFromElems( result, spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f) );
-}
-
-static inline void vmathSoaV4MakeZAxis( VmathSoaVector4 *result )
-{
-    vmathSoaV4MakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f) );
-}
-
-static inline void vmathSoaV4MakeWAxis( VmathSoaVector4 *result )
-{
-    vmathSoaV4MakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
-}
-
-static inline void vmathSoaV4Lerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    VmathSoaVector4 tmpV4_0, tmpV4_1;
-    vmathSoaV4Sub( &tmpV4_0, vec1, vec0 );
-    vmathSoaV4ScalarMul( &tmpV4_1, &tmpV4_0, t );
-    vmathSoaV4Add( result, vec0, &tmpV4_1 );
-}
-
-static inline void vmathSoaV4Slerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *unitVec0, const VmathSoaVector4 *unitVec1 )
-{
-    VmathSoaVector4 tmpV4_0, tmpV4_1;
-    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
-    vec_uint4 selectMask;
-    cosAngle = vmathSoaV4Dot( unitVec0, unitVec1 );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    recipSinAngle = recipf4( sinf4( angle ) );
-    scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
-    scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
-    vmathSoaV4ScalarMul( &tmpV4_0, unitVec0, scale0 );
-    vmathSoaV4ScalarMul( &tmpV4_1, unitVec1, scale1 );
-    vmathSoaV4Add( result, &tmpV4_0, &tmpV4_1 );
-}
-
-static inline void vmathSoaV4Get4Aos( const VmathSoaVector4 *vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = spu_shuffle( vec->x, vec->z, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( vec->y, vec->w, _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( vec->x, vec->z, _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( vec->y, vec->w, _VECTORMATH_SHUF_ZCWD );
-    vmathV4MakeFrom128( result0, spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB ) );
-    vmathV4MakeFrom128( result1, spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD ) );
-    vmathV4MakeFrom128( result2, spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB ) );
-    vmathV4MakeFrom128( result3, spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD ) );
-}
-
-static inline void vmathSoaV4StoreHalfFloats( const VmathSoaVector4 *vec, vec_ushort8 *twoQuads )
-{
-    VmathVector4 v0, v1, v2, v3;
-    vmathSoaV4Get4Aos( vec, &v0, &v1, &v2, &v3 );
-    twoQuads[0] = _vmath2VfToHalfFloats(v0.vec128, v1.vec128);
-    twoQuads[1] = _vmath2VfToHalfFloats(v2.vec128, v3.vec128);
-}
-
-static inline void vmathSoaV4SetXYZ( VmathSoaVector4 *result, const VmathSoaVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-}
-
-static inline void vmathSoaV4GetXYZ( VmathSoaVector3 *result, const VmathSoaVector4 *vec )
-{
-    vmathSoaV3MakeFromElems( result, vec->x, vec->y, vec->z );
-}
-
-static inline void vmathSoaV4SetX( VmathSoaVector4 *result, vec_float4 _x )
-{
-    result->x = _x;
-}
-
-static inline vec_float4 vmathSoaV4GetX( const VmathSoaVector4 *vec )
-{
-    return vec->x;
-}
-
-static inline void vmathSoaV4SetY( VmathSoaVector4 *result, vec_float4 _y )
-{
-    result->y = _y;
-}
-
-static inline vec_float4 vmathSoaV4GetY( const VmathSoaVector4 *vec )
-{
-    return vec->y;
-}
-
-static inline void vmathSoaV4SetZ( VmathSoaVector4 *result, vec_float4 _z )
-{
-    result->z = _z;
-}
-
-static inline vec_float4 vmathSoaV4GetZ( const VmathSoaVector4 *vec )
-{
-    return vec->z;
-}
-
-static inline void vmathSoaV4SetW( VmathSoaVector4 *result, vec_float4 _w )
-{
-    result->w = _w;
-}
-
-static inline vec_float4 vmathSoaV4GetW( const VmathSoaVector4 *vec )
-{
-    return vec->w;
-}
-
-static inline void vmathSoaV4SetElem( VmathSoaVector4 *result, int idx, vec_float4 value )
-{
-    *(&result->x + idx) = value;
-}
-
-static inline vec_float4 vmathSoaV4GetElem( const VmathSoaVector4 *vec, int idx )
-{
-    return *(&vec->x + idx);
-}
-
-static inline void vmathSoaV4Add( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = spu_add( vec0->x, vec1->x );
-    result->y = spu_add( vec0->y, vec1->y );
-    result->z = spu_add( vec0->z, vec1->z );
-    result->w = spu_add( vec0->w, vec1->w );
-}
-
-static inline void vmathSoaV4Sub( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = spu_sub( vec0->x, vec1->x );
-    result->y = spu_sub( vec0->y, vec1->y );
-    result->z = spu_sub( vec0->z, vec1->z );
-    result->w = spu_sub( vec0->w, vec1->w );
-}
-
-static inline void vmathSoaV4ScalarMul( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar )
-{
-    result->x = spu_mul( vec->x, scalar );
-    result->y = spu_mul( vec->y, scalar );
-    result->z = spu_mul( vec->z, scalar );
-    result->w = spu_mul( vec->w, scalar );
-}
-
-static inline void vmathSoaV4ScalarDiv( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar )
-{
-    result->x = divf4( vec->x, scalar );
-    result->y = divf4( vec->y, scalar );
-    result->z = divf4( vec->z, scalar );
-    result->w = divf4( vec->w, scalar );
-}
-
-static inline void vmathSoaV4Neg( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    result->x = negatef4( vec->x );
-    result->y = negatef4( vec->y );
-    result->z = negatef4( vec->z );
-    result->w = negatef4( vec->w );
-}
-
-static inline void vmathSoaV4MulPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = spu_mul( vec0->x, vec1->x );
-    result->y = spu_mul( vec0->y, vec1->y );
-    result->z = spu_mul( vec0->z, vec1->z );
-    result->w = spu_mul( vec0->w, vec1->w );
-}
-
-static inline void vmathSoaV4DivPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = divf4( vec0->x, vec1->x );
-    result->y = divf4( vec0->y, vec1->y );
-    result->z = divf4( vec0->z, vec1->z );
-    result->w = divf4( vec0->w, vec1->w );
-}
-
-static inline void vmathSoaV4RecipPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    result->x = recipf4( vec->x );
-    result->y = recipf4( vec->y );
-    result->z = recipf4( vec->z );
-    result->w = recipf4( vec->w );
-}
-
-static inline void vmathSoaV4SqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    result->x = sqrtf4( vec->x );
-    result->y = sqrtf4( vec->y );
-    result->z = sqrtf4( vec->z );
-    result->w = sqrtf4( vec->w );
-}
-
-static inline void vmathSoaV4RsqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    result->x = rsqrtf4( vec->x );
-    result->y = rsqrtf4( vec->y );
-    result->z = rsqrtf4( vec->z );
-    result->w = rsqrtf4( vec->w );
-}
-
-static inline void vmathSoaV4AbsPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    result->x = fabsf4( vec->x );
-    result->y = fabsf4( vec->y );
-    result->z = fabsf4( vec->z );
-    result->w = fabsf4( vec->w );
-}
-
-static inline void vmathSoaV4CopySignPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = copysignf4( vec0->x, vec1->x );
-    result->y = copysignf4( vec0->y, vec1->y );
-    result->z = copysignf4( vec0->z, vec1->z );
-    result->w = copysignf4( vec0->w, vec1->w );
-}
-
-static inline void vmathSoaV4MaxPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = fmaxf4( vec0->x, vec1->x );
-    result->y = fmaxf4( vec0->y, vec1->y );
-    result->z = fmaxf4( vec0->z, vec1->z );
-    result->w = fmaxf4( vec0->w, vec1->w );
-}
-
-static inline vec_float4 vmathSoaV4MaxElem( const VmathSoaVector4 *vec )
-{
-    vec_float4 result;
-    result = fmaxf4( vec->x, vec->y );
-    result = fmaxf4( vec->z, result );
-    result = fmaxf4( vec->w, result );
-    return result;
-}
-
-static inline void vmathSoaV4MinPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = fminf4( vec0->x, vec1->x );
-    result->y = fminf4( vec0->y, vec1->y );
-    result->z = fminf4( vec0->z, vec1->z );
-    result->w = fminf4( vec0->w, vec1->w );
-}
-
-static inline vec_float4 vmathSoaV4MinElem( const VmathSoaVector4 *vec )
-{
-    vec_float4 result;
-    result = fminf4( vec->x, vec->y );
-    result = fminf4( vec->z, result );
-    result = fminf4( vec->w, result );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV4Sum( const VmathSoaVector4 *vec )
-{
-    vec_float4 result;
-    result = spu_add( vec->x, vec->y );
-    result = spu_add( result, vec->z );
-    result = spu_add( result, vec->w );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV4Dot( const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    vec_float4 result;
-    result = spu_mul( vec0->x, vec1->x );
-    result = spu_add( result, spu_mul( vec0->y, vec1->y ) );
-    result = spu_add( result, spu_mul( vec0->z, vec1->z ) );
-    result = spu_add( result, spu_mul( vec0->w, vec1->w ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV4LengthSqr( const VmathSoaVector4 *vec )
-{
-    vec_float4 result;
-    result = spu_mul( vec->x, vec->x );
-    result = spu_add( result, spu_mul( vec->y, vec->y ) );
-    result = spu_add( result, spu_mul( vec->z, vec->z ) );
-    result = spu_add( result, spu_mul( vec->w, vec->w ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV4Length( const VmathSoaVector4 *vec )
-{
-    return sqrtf4( vmathSoaV4LengthSqr( vec ) );
-}
-
-static inline void vmathSoaV4Normalize( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    vec_float4 lenSqr, lenInv;
-    lenSqr = vmathSoaV4LengthSqr( vec );
-    lenInv = rsqrtf4( lenSqr );
-    result->x = spu_mul( vec->x, lenInv );
-    result->y = spu_mul( vec->y, lenInv );
-    result->z = spu_mul( vec->z, lenInv );
-    result->w = spu_mul( vec->w, lenInv );
-}
-
-static inline void vmathSoaV4Select( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1, vec_uint4 select1 )
-{
-    result->x = spu_sel( vec0->x, vec1->x, select1 );
-    result->y = spu_sel( vec0->y, vec1->y, select1 );
-    result->z = spu_sel( vec0->z, vec1->z, select1 );
-    result->w = spu_sel( vec0->w, vec1->w, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaV4Print( const VmathSoaVector4 *vec )
-{
-    VmathVector4 vec0, vec1, vec2, vec3;
-    vmathSoaV4Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathV4Print( &vec0 );
-    printf("slot 1:\n");
-    vmathV4Print( &vec1 );
-    printf("slot 2:\n");
-    vmathV4Print( &vec2 );
-    printf("slot 3:\n");
-    vmathV4Print( &vec3 );
-}
-
-static inline void vmathSoaV4Prints( const VmathSoaVector4 *vec, const char *name )
-{
-    VmathVector4 vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    vmathSoaV4Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathV4Print( &vec0 );
-    printf("slot 1:\n");
-    vmathV4Print( &vec1 );
-    printf("slot 2:\n");
-    vmathV4Print( &vec2 );
-    printf("slot 3:\n");
-    vmathV4Print( &vec3 );
-}
-
-#endif
-
-static inline void vmathSoaP3Copy( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = pnt->x;
-    result->y = pnt->y;
-    result->z = pnt->z;
-}
-
-static inline void vmathSoaP3MakeFromElems( VmathSoaPoint3 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z )
-{
-    result->x = _x;
-    result->y = _y;
-    result->z = _z;
-}
-
-static inline void vmathSoaP3MakeFromV3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-}
-
-static inline void vmathSoaP3MakeFromScalar( VmathSoaPoint3 *result, vec_float4 scalar )
-{
-    result->x = scalar;
-    result->y = scalar;
-    result->z = scalar;
-}
-
-static inline void vmathSoaP3MakeFromAos( VmathSoaPoint3 *result, const VmathPoint3 *pnt )
-{
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    vec_float4 vec128 = pnt->vec128;
-    result->x = spu_shuffle( vec128, vec128, shuffle_xxxx );
-    result->y = spu_shuffle( vec128, vec128, shuffle_yyyy );
-    result->z = spu_shuffle( vec128, vec128, shuffle_zzzz );
-}
-
-static inline void vmathSoaP3MakeFrom4Aos( VmathSoaPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = spu_shuffle( pnt0->vec128, pnt2->vec128, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( pnt1->vec128, pnt3->vec128, _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( pnt0->vec128, pnt2->vec128, _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( pnt1->vec128, pnt3->vec128, _VECTORMATH_SHUF_ZCWD );
-    result->x = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
-    result->y = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
-    result->z = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
-}
-
-static inline void vmathSoaP3Lerp( VmathSoaPoint3 *result, vec_float4 t, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    VmathSoaVector3 tmpV3_0, tmpV3_1;
-    vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
-    vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
-    vmathSoaP3AddV3( result, pnt0, &tmpV3_1 );
-}
-
-static inline void vmathSoaP3Get4Aos( const VmathSoaPoint3 *pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 )
-{
-    vec_float4 tmp0, tmp1;
-    tmp0 = spu_shuffle( pnt->x, pnt->z, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( pnt->x, pnt->z, _VECTORMATH_SHUF_ZCWD );
-    vmathP3MakeFrom128( result0, spu_shuffle( tmp0, pnt->y, _VECTORMATH_SHUF_XAYB ) );
-    vmathP3MakeFrom128( result1, spu_shuffle( tmp0, pnt->y, _VECTORMATH_SHUF_ZBW0 ) );
-    vmathP3MakeFrom128( result2, spu_shuffle( tmp1, pnt->y, _VECTORMATH_SHUF_XCY0 ) );
-    vmathP3MakeFrom128( result3, spu_shuffle( tmp1, pnt->y, _VECTORMATH_SHUF_ZDW0 ) );
-}
-
-static inline void vmathSoaP3LoadXYZArray( VmathSoaPoint3 *vec, const vec_float4 *threeQuads )
-{
-    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyxy = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_XYCD );
-    zxzx = spu_shuffle( zxyz, xyzx, _VECTORMATH_SHUF_XYCD );
-    yzyz = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_XYCD );
-    vmathSoaP3SetX( vec, spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XDZB ) );
-    vmathSoaP3SetY( vec, spu_shuffle( xyxy, yzyz, _VECTORMATH_SHUF_YAWC ) );
-    vmathSoaP3SetZ( vec, spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_ZBXD ) );
-}
-
-static inline void vmathSoaP3StoreXYZArray( const VmathSoaPoint3 *vec, vec_float4 *threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
-    xyxy = spu_shuffle( vec->x, vec->y, _VECTORMATH_SHUF_XAZC );
-    zxzx = spu_shuffle( vec->z, vec->x, _VECTORMATH_SHUF_ZDXB );
-    yzyz = spu_shuffle( vec->y, vec->z, _VECTORMATH_SHUF_YBWD );
-    xyzx = spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XYCD );
-    yzxy = spu_shuffle( yzyz, xyxy, _VECTORMATH_SHUF_XYCD );
-    zxyz = spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_XYCD );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-static inline void vmathSoaP3StoreHalfFloats( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_ushort8 *threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    vmathSoaP3StoreXYZArray( pnt0, xyz0 );
-    vmathSoaP3StoreXYZArray( pnt1, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-static inline void vmathSoaP3SetX( VmathSoaPoint3 *result, vec_float4 _x )
-{
-    result->x = _x;
-}
-
-static inline vec_float4 vmathSoaP3GetX( const VmathSoaPoint3 *pnt )
-{
-    return pnt->x;
-}
-
-static inline void vmathSoaP3SetY( VmathSoaPoint3 *result, vec_float4 _y )
-{
-    result->y = _y;
-}
-
-static inline vec_float4 vmathSoaP3GetY( const VmathSoaPoint3 *pnt )
-{
-    return pnt->y;
-}
-
-static inline void vmathSoaP3SetZ( VmathSoaPoint3 *result, vec_float4 _z )
-{
-    result->z = _z;
-}
-
-static inline vec_float4 vmathSoaP3GetZ( const VmathSoaPoint3 *pnt )
-{
-    return pnt->z;
-}
-
-static inline void vmathSoaP3SetElem( VmathSoaPoint3 *result, int idx, vec_float4 value )
-{
-    *(&result->x + idx) = value;
-}
-
-static inline vec_float4 vmathSoaP3GetElem( const VmathSoaPoint3 *pnt, int idx )
-{
-    return *(&pnt->x + idx);
-}
-
-static inline void vmathSoaP3Sub( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = spu_sub( pnt0->x, pnt1->x );
-    result->y = spu_sub( pnt0->y, pnt1->y );
-    result->z = spu_sub( pnt0->z, pnt1->z );
-}
-
-static inline void vmathSoaP3AddV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec1 )
-{
-    result->x = spu_add( pnt->x, vec1->x );
-    result->y = spu_add( pnt->y, vec1->y );
-    result->z = spu_add( pnt->z, vec1->z );
-}
-
-static inline void vmathSoaP3SubV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec1 )
-{
-    result->x = spu_sub( pnt->x, vec1->x );
-    result->y = spu_sub( pnt->y, vec1->y );
-    result->z = spu_sub( pnt->z, vec1->z );
-}
-
-static inline void vmathSoaP3MulPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = spu_mul( pnt0->x, pnt1->x );
-    result->y = spu_mul( pnt0->y, pnt1->y );
-    result->z = spu_mul( pnt0->z, pnt1->z );
-}
-
-static inline void vmathSoaP3DivPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = divf4( pnt0->x, pnt1->x );
-    result->y = divf4( pnt0->y, pnt1->y );
-    result->z = divf4( pnt0->z, pnt1->z );
-}
-
-static inline void vmathSoaP3RecipPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = recipf4( pnt->x );
-    result->y = recipf4( pnt->y );
-    result->z = recipf4( pnt->z );
-}
-
-static inline void vmathSoaP3SqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = sqrtf4( pnt->x );
-    result->y = sqrtf4( pnt->y );
-    result->z = sqrtf4( pnt->z );
-}
-
-static inline void vmathSoaP3RsqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = rsqrtf4( pnt->x );
-    result->y = rsqrtf4( pnt->y );
-    result->z = rsqrtf4( pnt->z );
-}
-
-static inline void vmathSoaP3AbsPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = fabsf4( pnt->x );
-    result->y = fabsf4( pnt->y );
-    result->z = fabsf4( pnt->z );
-}
-
-static inline void vmathSoaP3CopySignPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = copysignf4( pnt0->x, pnt1->x );
-    result->y = copysignf4( pnt0->y, pnt1->y );
-    result->z = copysignf4( pnt0->z, pnt1->z );
-}
-
-static inline void vmathSoaP3MaxPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = fmaxf4( pnt0->x, pnt1->x );
-    result->y = fmaxf4( pnt0->y, pnt1->y );
-    result->z = fmaxf4( pnt0->z, pnt1->z );
-}
-
-static inline vec_float4 vmathSoaP3MaxElem( const VmathSoaPoint3 *pnt )
-{
-    vec_float4 result;
-    result = fmaxf4( pnt->x, pnt->y );
-    result = fmaxf4( pnt->z, result );
-    return result;
-}
-
-static inline void vmathSoaP3MinPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = fminf4( pnt0->x, pnt1->x );
-    result->y = fminf4( pnt0->y, pnt1->y );
-    result->z = fminf4( pnt0->z, pnt1->z );
-}
-
-static inline vec_float4 vmathSoaP3MinElem( const VmathSoaPoint3 *pnt )
-{
-    vec_float4 result;
-    result = fminf4( pnt->x, pnt->y );
-    result = fminf4( pnt->z, result );
-    return result;
-}
-
-static inline vec_float4 vmathSoaP3Sum( const VmathSoaPoint3 *pnt )
-{
-    vec_float4 result;
-    result = spu_add( pnt->x, pnt->y );
-    result = spu_add( result, pnt->z );
-    return result;
-}
-
-static inline void vmathSoaP3Scale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, vec_float4 scaleVal )
-{
-    VmathSoaPoint3 tmpP3_0;
-    vmathSoaP3MakeFromScalar( &tmpP3_0, scaleVal );
-    vmathSoaP3MulPerElem( result, pnt, &tmpP3_0 );
-}
-
-static inline void vmathSoaP3NonUniformScale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *scaleVec )
-{
-    VmathSoaPoint3 tmpP3_0;
-    vmathSoaP3MakeFromV3( &tmpP3_0, scaleVec );
-    vmathSoaP3MulPerElem( result, pnt, &tmpP3_0 );
-}
-
-static inline vec_float4 vmathSoaP3Projection( const VmathSoaPoint3 *pnt, const VmathSoaVector3 *unitVec )
-{
-    vec_float4 result;
-    result = spu_mul( pnt->x, unitVec->x );
-    result = spu_add( result, spu_mul( pnt->y, unitVec->y ) );
-    result = spu_add( result, spu_mul( pnt->z, unitVec->z ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaP3DistSqrFromOrigin( const VmathSoaPoint3 *pnt )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaV3MakeFromP3( &tmpV3_0, pnt );
-    return vmathSoaV3LengthSqr( &tmpV3_0 );
-}
-
-static inline vec_float4 vmathSoaP3DistFromOrigin( const VmathSoaPoint3 *pnt )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaV3MakeFromP3( &tmpV3_0, pnt );
-    return vmathSoaV3Length( &tmpV3_0 );
-}
-
-static inline vec_float4 vmathSoaP3DistSqr( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
-    return vmathSoaV3LengthSqr( &tmpV3_0 );
-}
-
-static inline vec_float4 vmathSoaP3Dist( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
-    return vmathSoaV3Length( &tmpV3_0 );
-}
-
-static inline void vmathSoaP3Select( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_uint4 select1 )
-{
-    result->x = spu_sel( pnt0->x, pnt1->x, select1 );
-    result->y = spu_sel( pnt0->y, pnt1->y, select1 );
-    result->z = spu_sel( pnt0->z, pnt1->z, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaP3Print( const VmathSoaPoint3 *pnt )
-{
-    VmathPoint3 vec0, vec1, vec2, vec3;
-    vmathSoaP3Get4Aos( pnt, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathP3Print( &vec0 );
-    printf("slot 1:\n");
-    vmathP3Print( &vec1 );
-    printf("slot 2:\n");
-    vmathP3Print( &vec2 );
-    printf("slot 3:\n");
-    vmathP3Print( &vec3 );
-}
-
-static inline void vmathSoaP3Prints( const VmathSoaPoint3 *pnt, const char *name )
-{
-    VmathPoint3 vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    vmathSoaP3Get4Aos( pnt, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathP3Print( &vec0 );
-    printf("slot 1:\n");
-    vmathP3Print( &vec1 );
-    printf("slot 2:\n");
-    vmathP3Print( &vec2 );
-    printf("slot 3:\n");
-    vmathP3Print( &vec3 );
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_SOA_C_H
+#define _VECTORMATH_VEC_SOA_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+ */
+#define _VECTORMATH_SHUF_X 0x00010203
+#define _VECTORMATH_SHUF_Y 0x04050607
+#define _VECTORMATH_SHUF_Z 0x08090a0b
+#define _VECTORMATH_SHUF_W 0x0c0d0e0f
+#define _VECTORMATH_SHUF_A 0x10111213
+#define _VECTORMATH_SHUF_B 0x14151617
+#define _VECTORMATH_SHUF_C 0x18191a1b
+#define _VECTORMATH_SHUF_D 0x1c1d1e1f
+#define _VECTORMATH_SHUF_0 0x80808080
+#define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_ZDW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_ZDXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_YAWC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_XYCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline void vmathSoaV3Copy( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+}
+
+static inline void vmathSoaV3MakeFromElems( VmathSoaVector3 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z )
+{
+    result->x = _x;
+    result->y = _y;
+    result->z = _z;
+}
+
+static inline void vmathSoaV3MakeFromP3( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = pnt->x;
+    result->y = pnt->y;
+    result->z = pnt->z;
+}
+
+static inline void vmathSoaV3MakeFromScalar( VmathSoaVector3 *result, vec_float4 scalar )
+{
+    result->x = scalar;
+    result->y = scalar;
+    result->z = scalar;
+}
+
+static inline void vmathSoaV3MakeFromAos( VmathSoaVector3 *result, const VmathVector3 *vec )
+{
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    vec_float4 vec128 = vec->vec128;
+    result->x = spu_shuffle( vec128, vec128, shuffle_xxxx );
+    result->y = spu_shuffle( vec128, vec128, shuffle_yyyy );
+    result->z = spu_shuffle( vec128, vec128, shuffle_zzzz );
+}
+
+static inline void vmathSoaV3MakeFrom4Aos( VmathSoaVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = spu_shuffle( vec0->vec128, vec2->vec128, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( vec1->vec128, vec3->vec128, _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( vec0->vec128, vec2->vec128, _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( vec1->vec128, vec3->vec128, _VECTORMATH_SHUF_ZCWD );
+    result->x = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
+    result->y = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
+    result->z = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
+}
+
+static inline void vmathSoaV3MakeXAxis( VmathSoaVector3 *result )
+{
+    vmathSoaV3MakeFromElems( result, spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f) );
+}
+
+static inline void vmathSoaV3MakeYAxis( VmathSoaVector3 *result )
+{
+    vmathSoaV3MakeFromElems( result, spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f) );
+}
+
+static inline void vmathSoaV3MakeZAxis( VmathSoaVector3 *result )
+{
+    vmathSoaV3MakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
+}
+
+static inline void vmathSoaV3Lerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    VmathSoaVector3 tmpV3_0, tmpV3_1;
+    vmathSoaV3Sub( &tmpV3_0, vec1, vec0 );
+    vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
+    vmathSoaV3Add( result, vec0, &tmpV3_1 );
+}
+
+static inline void vmathSoaV3Slerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 )
+{
+    VmathSoaVector3 tmpV3_0, tmpV3_1;
+    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
+    vec_uint4 selectMask;
+    cosAngle = vmathSoaV3Dot( unitVec0, unitVec1 );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    recipSinAngle = recipf4( sinf4( angle ) );
+    scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
+    scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
+    vmathSoaV3ScalarMul( &tmpV3_0, unitVec0, scale0 );
+    vmathSoaV3ScalarMul( &tmpV3_1, unitVec1, scale1 );
+    vmathSoaV3Add( result, &tmpV3_0, &tmpV3_1 );
+}
+
+static inline void vmathSoaV3Get4Aos( const VmathSoaVector3 *vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 )
+{
+    vec_float4 tmp0, tmp1;
+    tmp0 = spu_shuffle( vec->x, vec->z, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( vec->x, vec->z, _VECTORMATH_SHUF_ZCWD );
+    vmathV3MakeFrom128( result0, spu_shuffle( tmp0, vec->y, _VECTORMATH_SHUF_XAYB ) );
+    vmathV3MakeFrom128( result1, spu_shuffle( tmp0, vec->y, _VECTORMATH_SHUF_ZBW0 ) );
+    vmathV3MakeFrom128( result2, spu_shuffle( tmp1, vec->y, _VECTORMATH_SHUF_XCY0 ) );
+    vmathV3MakeFrom128( result3, spu_shuffle( tmp1, vec->y, _VECTORMATH_SHUF_ZDW0 ) );
+}
+
+static inline void vmathSoaV3LoadXYZArray( VmathSoaVector3 *vec, const vec_float4 *threeQuads )
+{
+    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyxy = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_XYCD );
+    zxzx = spu_shuffle( zxyz, xyzx, _VECTORMATH_SHUF_XYCD );
+    yzyz = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_XYCD );
+    vmathSoaV3SetX( vec, spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XDZB ) );
+    vmathSoaV3SetY( vec, spu_shuffle( xyxy, yzyz, _VECTORMATH_SHUF_YAWC ) );
+    vmathSoaV3SetZ( vec, spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_ZBXD ) );
+}
+
+static inline void vmathSoaV3StoreXYZArray( const VmathSoaVector3 *vec, vec_float4 *threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
+    xyxy = spu_shuffle( vec->x, vec->y, _VECTORMATH_SHUF_XAZC );
+    zxzx = spu_shuffle( vec->z, vec->x, _VECTORMATH_SHUF_ZDXB );
+    yzyz = spu_shuffle( vec->y, vec->z, _VECTORMATH_SHUF_YBWD );
+    xyzx = spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XYCD );
+    yzxy = spu_shuffle( yzyz, xyxy, _VECTORMATH_SHUF_XYCD );
+    zxyz = spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_XYCD );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+static inline void vmathSoaV3StoreHalfFloats( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_ushort8 *threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    vmathSoaV3StoreXYZArray( vec0, xyz0 );
+    vmathSoaV3StoreXYZArray( vec1, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+static inline void vmathSoaV3SetX( VmathSoaVector3 *result, vec_float4 _x )
+{
+    result->x = _x;
+}
+
+static inline vec_float4 vmathSoaV3GetX( const VmathSoaVector3 *vec )
+{
+    return vec->x;
+}
+
+static inline void vmathSoaV3SetY( VmathSoaVector3 *result, vec_float4 _y )
+{
+    result->y = _y;
+}
+
+static inline vec_float4 vmathSoaV3GetY( const VmathSoaVector3 *vec )
+{
+    return vec->y;
+}
+
+static inline void vmathSoaV3SetZ( VmathSoaVector3 *result, vec_float4 _z )
+{
+    result->z = _z;
+}
+
+static inline vec_float4 vmathSoaV3GetZ( const VmathSoaVector3 *vec )
+{
+    return vec->z;
+}
+
+static inline void vmathSoaV3SetElem( VmathSoaVector3 *result, int idx, vec_float4 value )
+{
+    *(&result->x + idx) = value;
+}
+
+static inline vec_float4 vmathSoaV3GetElem( const VmathSoaVector3 *vec, int idx )
+{
+    return *(&vec->x + idx);
+}
+
+static inline void vmathSoaV3Add( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = spu_add( vec0->x, vec1->x );
+    result->y = spu_add( vec0->y, vec1->y );
+    result->z = spu_add( vec0->z, vec1->z );
+}
+
+static inline void vmathSoaV3Sub( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = spu_sub( vec0->x, vec1->x );
+    result->y = spu_sub( vec0->y, vec1->y );
+    result->z = spu_sub( vec0->z, vec1->z );
+}
+
+static inline void vmathSoaV3AddP3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = spu_add( vec->x, pnt1->x );
+    result->y = spu_add( vec->y, pnt1->y );
+    result->z = spu_add( vec->z, pnt1->z );
+}
+
+static inline void vmathSoaV3ScalarMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar )
+{
+    result->x = spu_mul( vec->x, scalar );
+    result->y = spu_mul( vec->y, scalar );
+    result->z = spu_mul( vec->z, scalar );
+}
+
+static inline void vmathSoaV3ScalarDiv( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar )
+{
+    result->x = divf4( vec->x, scalar );
+    result->y = divf4( vec->y, scalar );
+    result->z = divf4( vec->z, scalar );
+}
+
+static inline void vmathSoaV3Neg( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = negatef4( vec->x );
+    result->y = negatef4( vec->y );
+    result->z = negatef4( vec->z );
+}
+
+static inline void vmathSoaV3MulPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = spu_mul( vec0->x, vec1->x );
+    result->y = spu_mul( vec0->y, vec1->y );
+    result->z = spu_mul( vec0->z, vec1->z );
+}
+
+static inline void vmathSoaV3DivPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = divf4( vec0->x, vec1->x );
+    result->y = divf4( vec0->y, vec1->y );
+    result->z = divf4( vec0->z, vec1->z );
+}
+
+static inline void vmathSoaV3RecipPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = recipf4( vec->x );
+    result->y = recipf4( vec->y );
+    result->z = recipf4( vec->z );
+}
+
+static inline void vmathSoaV3SqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = sqrtf4( vec->x );
+    result->y = sqrtf4( vec->y );
+    result->z = sqrtf4( vec->z );
+}
+
+static inline void vmathSoaV3RsqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = rsqrtf4( vec->x );
+    result->y = rsqrtf4( vec->y );
+    result->z = rsqrtf4( vec->z );
+}
+
+static inline void vmathSoaV3AbsPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = fabsf4( vec->x );
+    result->y = fabsf4( vec->y );
+    result->z = fabsf4( vec->z );
+}
+
+static inline void vmathSoaV3CopySignPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = copysignf4( vec0->x, vec1->x );
+    result->y = copysignf4( vec0->y, vec1->y );
+    result->z = copysignf4( vec0->z, vec1->z );
+}
+
+static inline void vmathSoaV3MaxPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = fmaxf4( vec0->x, vec1->x );
+    result->y = fmaxf4( vec0->y, vec1->y );
+    result->z = fmaxf4( vec0->z, vec1->z );
+}
+
+static inline vec_float4 vmathSoaV3MaxElem( const VmathSoaVector3 *vec )
+{
+    vec_float4 result;
+    result = fmaxf4( vec->x, vec->y );
+    result = fmaxf4( vec->z, result );
+    return result;
+}
+
+static inline void vmathSoaV3MinPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = fminf4( vec0->x, vec1->x );
+    result->y = fminf4( vec0->y, vec1->y );
+    result->z = fminf4( vec0->z, vec1->z );
+}
+
+static inline vec_float4 vmathSoaV3MinElem( const VmathSoaVector3 *vec )
+{
+    vec_float4 result;
+    result = fminf4( vec->x, vec->y );
+    result = fminf4( vec->z, result );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV3Sum( const VmathSoaVector3 *vec )
+{
+    vec_float4 result;
+    result = spu_add( vec->x, vec->y );
+    result = spu_add( result, vec->z );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV3Dot( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    vec_float4 result;
+    result = spu_mul( vec0->x, vec1->x );
+    result = spu_add( result, spu_mul( vec0->y, vec1->y ) );
+    result = spu_add( result, spu_mul( vec0->z, vec1->z ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV3LengthSqr( const VmathSoaVector3 *vec )
+{
+    vec_float4 result;
+    result = spu_mul( vec->x, vec->x );
+    result = spu_add( result, spu_mul( vec->y, vec->y ) );
+    result = spu_add( result, spu_mul( vec->z, vec->z ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV3Length( const VmathSoaVector3 *vec )
+{
+    return sqrtf4( vmathSoaV3LengthSqr( vec ) );
+}
+
+static inline void vmathSoaV3Normalize( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    vec_float4 lenSqr, lenInv;
+    lenSqr = vmathSoaV3LengthSqr( vec );
+    lenInv = rsqrtf4( lenSqr );
+    result->x = spu_mul( vec->x, lenInv );
+    result->y = spu_mul( vec->y, lenInv );
+    result->z = spu_mul( vec->z, lenInv );
+}
+
+static inline void vmathSoaV3Cross( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    vec_float4 tmpX, tmpY, tmpZ;
+    tmpX = spu_sub( spu_mul( vec0->y, vec1->z ), spu_mul( vec0->z, vec1->y ) );
+    tmpY = spu_sub( spu_mul( vec0->z, vec1->x ), spu_mul( vec0->x, vec1->z ) );
+    tmpZ = spu_sub( spu_mul( vec0->x, vec1->y ), spu_mul( vec0->y, vec1->x ) );
+    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathSoaV3Select( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_uint4 select1 )
+{
+    result->x = spu_sel( vec0->x, vec1->x, select1 );
+    result->y = spu_sel( vec0->y, vec1->y, select1 );
+    result->z = spu_sel( vec0->z, vec1->z, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaV3Print( const VmathSoaVector3 *vec )
+{
+    VmathVector3 vec0, vec1, vec2, vec3;
+    vmathSoaV3Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathV3Print( &vec0 );
+    printf("slot 1:\n");
+    vmathV3Print( &vec1 );
+    printf("slot 2:\n");
+    vmathV3Print( &vec2 );
+    printf("slot 3:\n");
+    vmathV3Print( &vec3 );
+}
+
+static inline void vmathSoaV3Prints( const VmathSoaVector3 *vec, const char *name )
+{
+    VmathVector3 vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    vmathSoaV3Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathV3Print( &vec0 );
+    printf("slot 1:\n");
+    vmathV3Print( &vec1 );
+    printf("slot 2:\n");
+    vmathV3Print( &vec2 );
+    printf("slot 3:\n");
+    vmathV3Print( &vec3 );
+}
+
+#endif
+
+static inline void vmathSoaV4Copy( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+    result->w = vec->w;
+}
+
+static inline void vmathSoaV4MakeFromElems( VmathSoaVector4 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
+{
+    result->x = _x;
+    result->y = _y;
+    result->z = _z;
+    result->w = _w;
+}
+
+static inline void vmathSoaV4MakeFromV3Scalar( VmathSoaVector4 *result, const VmathSoaVector3 *xyz, vec_float4 _w )
+{
+    vmathSoaV4SetXYZ( result, xyz );
+    vmathSoaV4SetW( result, _w );
+}
+
+static inline void vmathSoaV4MakeFromV3( VmathSoaVector4 *result, const VmathSoaVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+    result->w = spu_splats(0.0f);
+}
+
+static inline void vmathSoaV4MakeFromP3( VmathSoaVector4 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = pnt->x;
+    result->y = pnt->y;
+    result->z = pnt->z;
+    result->w = spu_splats(1.0f);
+}
+
+static inline void vmathSoaV4MakeFromQ( VmathSoaVector4 *result, const VmathSoaQuat *quat )
+{
+    result->x = quat->x;
+    result->y = quat->y;
+    result->z = quat->z;
+    result->w = quat->w;
+}
+
+static inline void vmathSoaV4MakeFromScalar( VmathSoaVector4 *result, vec_float4 scalar )
+{
+    result->x = scalar;
+    result->y = scalar;
+    result->z = scalar;
+    result->w = scalar;
+}
+
+static inline void vmathSoaV4MakeFromAos( VmathSoaVector4 *result, const VmathVector4 *vec )
+{
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
+    vec_float4 vec128 = vec->vec128;
+    result->x = spu_shuffle( vec128, vec128, shuffle_xxxx );
+    result->y = spu_shuffle( vec128, vec128, shuffle_yyyy );
+    result->z = spu_shuffle( vec128, vec128, shuffle_zzzz );
+    result->w = spu_shuffle( vec128, vec128, shuffle_wwww );
+}
+
+static inline void vmathSoaV4MakeFrom4Aos( VmathSoaVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = spu_shuffle( vec0->vec128, vec2->vec128, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( vec1->vec128, vec3->vec128, _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( vec0->vec128, vec2->vec128, _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( vec1->vec128, vec3->vec128, _VECTORMATH_SHUF_ZCWD );
+    result->x = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
+    result->y = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
+    result->z = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
+    result->w = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
+}
+
+static inline void vmathSoaV4MakeXAxis( VmathSoaVector4 *result )
+{
+    vmathSoaV4MakeFromElems( result, spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
+}
+
+static inline void vmathSoaV4MakeYAxis( VmathSoaVector4 *result )
+{
+    vmathSoaV4MakeFromElems( result, spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f) );
+}
+
+static inline void vmathSoaV4MakeZAxis( VmathSoaVector4 *result )
+{
+    vmathSoaV4MakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f) );
+}
+
+static inline void vmathSoaV4MakeWAxis( VmathSoaVector4 *result )
+{
+    vmathSoaV4MakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
+}
+
+static inline void vmathSoaV4Lerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    VmathSoaVector4 tmpV4_0, tmpV4_1;
+    vmathSoaV4Sub( &tmpV4_0, vec1, vec0 );
+    vmathSoaV4ScalarMul( &tmpV4_1, &tmpV4_0, t );
+    vmathSoaV4Add( result, vec0, &tmpV4_1 );
+}
+
+static inline void vmathSoaV4Slerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *unitVec0, const VmathSoaVector4 *unitVec1 )
+{
+    VmathSoaVector4 tmpV4_0, tmpV4_1;
+    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
+    vec_uint4 selectMask;
+    cosAngle = vmathSoaV4Dot( unitVec0, unitVec1 );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    recipSinAngle = recipf4( sinf4( angle ) );
+    scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
+    scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
+    vmathSoaV4ScalarMul( &tmpV4_0, unitVec0, scale0 );
+    vmathSoaV4ScalarMul( &tmpV4_1, unitVec1, scale1 );
+    vmathSoaV4Add( result, &tmpV4_0, &tmpV4_1 );
+}
+
+static inline void vmathSoaV4Get4Aos( const VmathSoaVector4 *vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = spu_shuffle( vec->x, vec->z, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( vec->y, vec->w, _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( vec->x, vec->z, _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( vec->y, vec->w, _VECTORMATH_SHUF_ZCWD );
+    vmathV4MakeFrom128( result0, spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB ) );
+    vmathV4MakeFrom128( result1, spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD ) );
+    vmathV4MakeFrom128( result2, spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB ) );
+    vmathV4MakeFrom128( result3, spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD ) );
+}
+
+static inline void vmathSoaV4StoreHalfFloats( const VmathSoaVector4 *vec, vec_ushort8 *twoQuads )
+{
+    VmathVector4 v0, v1, v2, v3;
+    vmathSoaV4Get4Aos( vec, &v0, &v1, &v2, &v3 );
+    twoQuads[0] = _vmath2VfToHalfFloats(v0.vec128, v1.vec128);
+    twoQuads[1] = _vmath2VfToHalfFloats(v2.vec128, v3.vec128);
+}
+
+static inline void vmathSoaV4SetXYZ( VmathSoaVector4 *result, const VmathSoaVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+}
+
+static inline void vmathSoaV4GetXYZ( VmathSoaVector3 *result, const VmathSoaVector4 *vec )
+{
+    vmathSoaV3MakeFromElems( result, vec->x, vec->y, vec->z );
+}
+
+static inline void vmathSoaV4SetX( VmathSoaVector4 *result, vec_float4 _x )
+{
+    result->x = _x;
+}
+
+static inline vec_float4 vmathSoaV4GetX( const VmathSoaVector4 *vec )
+{
+    return vec->x;
+}
+
+static inline void vmathSoaV4SetY( VmathSoaVector4 *result, vec_float4 _y )
+{
+    result->y = _y;
+}
+
+static inline vec_float4 vmathSoaV4GetY( const VmathSoaVector4 *vec )
+{
+    return vec->y;
+}
+
+static inline void vmathSoaV4SetZ( VmathSoaVector4 *result, vec_float4 _z )
+{
+    result->z = _z;
+}
+
+static inline vec_float4 vmathSoaV4GetZ( const VmathSoaVector4 *vec )
+{
+    return vec->z;
+}
+
+static inline void vmathSoaV4SetW( VmathSoaVector4 *result, vec_float4 _w )
+{
+    result->w = _w;
+}
+
+static inline vec_float4 vmathSoaV4GetW( const VmathSoaVector4 *vec )
+{
+    return vec->w;
+}
+
+static inline void vmathSoaV4SetElem( VmathSoaVector4 *result, int idx, vec_float4 value )
+{
+    *(&result->x + idx) = value;
+}
+
+static inline vec_float4 vmathSoaV4GetElem( const VmathSoaVector4 *vec, int idx )
+{
+    return *(&vec->x + idx);
+}
+
+static inline void vmathSoaV4Add( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = spu_add( vec0->x, vec1->x );
+    result->y = spu_add( vec0->y, vec1->y );
+    result->z = spu_add( vec0->z, vec1->z );
+    result->w = spu_add( vec0->w, vec1->w );
+}
+
+static inline void vmathSoaV4Sub( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = spu_sub( vec0->x, vec1->x );
+    result->y = spu_sub( vec0->y, vec1->y );
+    result->z = spu_sub( vec0->z, vec1->z );
+    result->w = spu_sub( vec0->w, vec1->w );
+}
+
+static inline void vmathSoaV4ScalarMul( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar )
+{
+    result->x = spu_mul( vec->x, scalar );
+    result->y = spu_mul( vec->y, scalar );
+    result->z = spu_mul( vec->z, scalar );
+    result->w = spu_mul( vec->w, scalar );
+}
+
+static inline void vmathSoaV4ScalarDiv( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar )
+{
+    result->x = divf4( vec->x, scalar );
+    result->y = divf4( vec->y, scalar );
+    result->z = divf4( vec->z, scalar );
+    result->w = divf4( vec->w, scalar );
+}
+
+static inline void vmathSoaV4Neg( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    result->x = negatef4( vec->x );
+    result->y = negatef4( vec->y );
+    result->z = negatef4( vec->z );
+    result->w = negatef4( vec->w );
+}
+
+static inline void vmathSoaV4MulPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = spu_mul( vec0->x, vec1->x );
+    result->y = spu_mul( vec0->y, vec1->y );
+    result->z = spu_mul( vec0->z, vec1->z );
+    result->w = spu_mul( vec0->w, vec1->w );
+}
+
+static inline void vmathSoaV4DivPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = divf4( vec0->x, vec1->x );
+    result->y = divf4( vec0->y, vec1->y );
+    result->z = divf4( vec0->z, vec1->z );
+    result->w = divf4( vec0->w, vec1->w );
+}
+
+static inline void vmathSoaV4RecipPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    result->x = recipf4( vec->x );
+    result->y = recipf4( vec->y );
+    result->z = recipf4( vec->z );
+    result->w = recipf4( vec->w );
+}
+
+static inline void vmathSoaV4SqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    result->x = sqrtf4( vec->x );
+    result->y = sqrtf4( vec->y );
+    result->z = sqrtf4( vec->z );
+    result->w = sqrtf4( vec->w );
+}
+
+static inline void vmathSoaV4RsqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    result->x = rsqrtf4( vec->x );
+    result->y = rsqrtf4( vec->y );
+    result->z = rsqrtf4( vec->z );
+    result->w = rsqrtf4( vec->w );
+}
+
+static inline void vmathSoaV4AbsPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    result->x = fabsf4( vec->x );
+    result->y = fabsf4( vec->y );
+    result->z = fabsf4( vec->z );
+    result->w = fabsf4( vec->w );
+}
+
+static inline void vmathSoaV4CopySignPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = copysignf4( vec0->x, vec1->x );
+    result->y = copysignf4( vec0->y, vec1->y );
+    result->z = copysignf4( vec0->z, vec1->z );
+    result->w = copysignf4( vec0->w, vec1->w );
+}
+
+static inline void vmathSoaV4MaxPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = fmaxf4( vec0->x, vec1->x );
+    result->y = fmaxf4( vec0->y, vec1->y );
+    result->z = fmaxf4( vec0->z, vec1->z );
+    result->w = fmaxf4( vec0->w, vec1->w );
+}
+
+static inline vec_float4 vmathSoaV4MaxElem( const VmathSoaVector4 *vec )
+{
+    vec_float4 result;
+    result = fmaxf4( vec->x, vec->y );
+    result = fmaxf4( vec->z, result );
+    result = fmaxf4( vec->w, result );
+    return result;
+}
+
+static inline void vmathSoaV4MinPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = fminf4( vec0->x, vec1->x );
+    result->y = fminf4( vec0->y, vec1->y );
+    result->z = fminf4( vec0->z, vec1->z );
+    result->w = fminf4( vec0->w, vec1->w );
+}
+
+static inline vec_float4 vmathSoaV4MinElem( const VmathSoaVector4 *vec )
+{
+    vec_float4 result;
+    result = fminf4( vec->x, vec->y );
+    result = fminf4( vec->z, result );
+    result = fminf4( vec->w, result );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV4Sum( const VmathSoaVector4 *vec )
+{
+    vec_float4 result;
+    result = spu_add( vec->x, vec->y );
+    result = spu_add( result, vec->z );
+    result = spu_add( result, vec->w );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV4Dot( const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    vec_float4 result;
+    result = spu_mul( vec0->x, vec1->x );
+    result = spu_add( result, spu_mul( vec0->y, vec1->y ) );
+    result = spu_add( result, spu_mul( vec0->z, vec1->z ) );
+    result = spu_add( result, spu_mul( vec0->w, vec1->w ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV4LengthSqr( const VmathSoaVector4 *vec )
+{
+    vec_float4 result;
+    result = spu_mul( vec->x, vec->x );
+    result = spu_add( result, spu_mul( vec->y, vec->y ) );
+    result = spu_add( result, spu_mul( vec->z, vec->z ) );
+    result = spu_add( result, spu_mul( vec->w, vec->w ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV4Length( const VmathSoaVector4 *vec )
+{
+    return sqrtf4( vmathSoaV4LengthSqr( vec ) );
+}
+
+static inline void vmathSoaV4Normalize( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    vec_float4 lenSqr, lenInv;
+    lenSqr = vmathSoaV4LengthSqr( vec );
+    lenInv = rsqrtf4( lenSqr );
+    result->x = spu_mul( vec->x, lenInv );
+    result->y = spu_mul( vec->y, lenInv );
+    result->z = spu_mul( vec->z, lenInv );
+    result->w = spu_mul( vec->w, lenInv );
+}
+
+static inline void vmathSoaV4Select( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1, vec_uint4 select1 )
+{
+    result->x = spu_sel( vec0->x, vec1->x, select1 );
+    result->y = spu_sel( vec0->y, vec1->y, select1 );
+    result->z = spu_sel( vec0->z, vec1->z, select1 );
+    result->w = spu_sel( vec0->w, vec1->w, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaV4Print( const VmathSoaVector4 *vec )
+{
+    VmathVector4 vec0, vec1, vec2, vec3;
+    vmathSoaV4Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathV4Print( &vec0 );
+    printf("slot 1:\n");
+    vmathV4Print( &vec1 );
+    printf("slot 2:\n");
+    vmathV4Print( &vec2 );
+    printf("slot 3:\n");
+    vmathV4Print( &vec3 );
+}
+
+static inline void vmathSoaV4Prints( const VmathSoaVector4 *vec, const char *name )
+{
+    VmathVector4 vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    vmathSoaV4Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathV4Print( &vec0 );
+    printf("slot 1:\n");
+    vmathV4Print( &vec1 );
+    printf("slot 2:\n");
+    vmathV4Print( &vec2 );
+    printf("slot 3:\n");
+    vmathV4Print( &vec3 );
+}
+
+#endif
+
+static inline void vmathSoaP3Copy( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = pnt->x;
+    result->y = pnt->y;
+    result->z = pnt->z;
+}
+
+static inline void vmathSoaP3MakeFromElems( VmathSoaPoint3 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z )
+{
+    result->x = _x;
+    result->y = _y;
+    result->z = _z;
+}
+
+static inline void vmathSoaP3MakeFromV3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+}
+
+static inline void vmathSoaP3MakeFromScalar( VmathSoaPoint3 *result, vec_float4 scalar )
+{
+    result->x = scalar;
+    result->y = scalar;
+    result->z = scalar;
+}
+
+static inline void vmathSoaP3MakeFromAos( VmathSoaPoint3 *result, const VmathPoint3 *pnt )
+{
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    vec_float4 vec128 = pnt->vec128;
+    result->x = spu_shuffle( vec128, vec128, shuffle_xxxx );
+    result->y = spu_shuffle( vec128, vec128, shuffle_yyyy );
+    result->z = spu_shuffle( vec128, vec128, shuffle_zzzz );
+}
+
+static inline void vmathSoaP3MakeFrom4Aos( VmathSoaPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = spu_shuffle( pnt0->vec128, pnt2->vec128, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( pnt1->vec128, pnt3->vec128, _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( pnt0->vec128, pnt2->vec128, _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( pnt1->vec128, pnt3->vec128, _VECTORMATH_SHUF_ZCWD );
+    result->x = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
+    result->y = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
+    result->z = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
+}
+
+static inline void vmathSoaP3Lerp( VmathSoaPoint3 *result, vec_float4 t, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    VmathSoaVector3 tmpV3_0, tmpV3_1;
+    vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
+    vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
+    vmathSoaP3AddV3( result, pnt0, &tmpV3_1 );
+}
+
+static inline void vmathSoaP3Get4Aos( const VmathSoaPoint3 *pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 )
+{
+    vec_float4 tmp0, tmp1;
+    tmp0 = spu_shuffle( pnt->x, pnt->z, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( pnt->x, pnt->z, _VECTORMATH_SHUF_ZCWD );
+    vmathP3MakeFrom128( result0, spu_shuffle( tmp0, pnt->y, _VECTORMATH_SHUF_XAYB ) );
+    vmathP3MakeFrom128( result1, spu_shuffle( tmp0, pnt->y, _VECTORMATH_SHUF_ZBW0 ) );
+    vmathP3MakeFrom128( result2, spu_shuffle( tmp1, pnt->y, _VECTORMATH_SHUF_XCY0 ) );
+    vmathP3MakeFrom128( result3, spu_shuffle( tmp1, pnt->y, _VECTORMATH_SHUF_ZDW0 ) );
+}
+
+static inline void vmathSoaP3LoadXYZArray( VmathSoaPoint3 *vec, const vec_float4 *threeQuads )
+{
+    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyxy = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_XYCD );
+    zxzx = spu_shuffle( zxyz, xyzx, _VECTORMATH_SHUF_XYCD );
+    yzyz = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_XYCD );
+    vmathSoaP3SetX( vec, spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XDZB ) );
+    vmathSoaP3SetY( vec, spu_shuffle( xyxy, yzyz, _VECTORMATH_SHUF_YAWC ) );
+    vmathSoaP3SetZ( vec, spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_ZBXD ) );
+}
+
+static inline void vmathSoaP3StoreXYZArray( const VmathSoaPoint3 *vec, vec_float4 *threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
+    xyxy = spu_shuffle( vec->x, vec->y, _VECTORMATH_SHUF_XAZC );
+    zxzx = spu_shuffle( vec->z, vec->x, _VECTORMATH_SHUF_ZDXB );
+    yzyz = spu_shuffle( vec->y, vec->z, _VECTORMATH_SHUF_YBWD );
+    xyzx = spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XYCD );
+    yzxy = spu_shuffle( yzyz, xyxy, _VECTORMATH_SHUF_XYCD );
+    zxyz = spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_XYCD );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+static inline void vmathSoaP3StoreHalfFloats( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_ushort8 *threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    vmathSoaP3StoreXYZArray( pnt0, xyz0 );
+    vmathSoaP3StoreXYZArray( pnt1, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+static inline void vmathSoaP3SetX( VmathSoaPoint3 *result, vec_float4 _x )
+{
+    result->x = _x;
+}
+
+static inline vec_float4 vmathSoaP3GetX( const VmathSoaPoint3 *pnt )
+{
+    return pnt->x;
+}
+
+static inline void vmathSoaP3SetY( VmathSoaPoint3 *result, vec_float4 _y )
+{
+    result->y = _y;
+}
+
+static inline vec_float4 vmathSoaP3GetY( const VmathSoaPoint3 *pnt )
+{
+    return pnt->y;
+}
+
+static inline void vmathSoaP3SetZ( VmathSoaPoint3 *result, vec_float4 _z )
+{
+    result->z = _z;
+}
+
+static inline vec_float4 vmathSoaP3GetZ( const VmathSoaPoint3 *pnt )
+{
+    return pnt->z;
+}
+
+static inline void vmathSoaP3SetElem( VmathSoaPoint3 *result, int idx, vec_float4 value )
+{
+    *(&result->x + idx) = value;
+}
+
+static inline vec_float4 vmathSoaP3GetElem( const VmathSoaPoint3 *pnt, int idx )
+{
+    return *(&pnt->x + idx);
+}
+
+static inline void vmathSoaP3Sub( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = spu_sub( pnt0->x, pnt1->x );
+    result->y = spu_sub( pnt0->y, pnt1->y );
+    result->z = spu_sub( pnt0->z, pnt1->z );
+}
+
+static inline void vmathSoaP3AddV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec1 )
+{
+    result->x = spu_add( pnt->x, vec1->x );
+    result->y = spu_add( pnt->y, vec1->y );
+    result->z = spu_add( pnt->z, vec1->z );
+}
+
+static inline void vmathSoaP3SubV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec1 )
+{
+    result->x = spu_sub( pnt->x, vec1->x );
+    result->y = spu_sub( pnt->y, vec1->y );
+    result->z = spu_sub( pnt->z, vec1->z );
+}
+
+static inline void vmathSoaP3MulPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = spu_mul( pnt0->x, pnt1->x );
+    result->y = spu_mul( pnt0->y, pnt1->y );
+    result->z = spu_mul( pnt0->z, pnt1->z );
+}
+
+static inline void vmathSoaP3DivPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = divf4( pnt0->x, pnt1->x );
+    result->y = divf4( pnt0->y, pnt1->y );
+    result->z = divf4( pnt0->z, pnt1->z );
+}
+
+static inline void vmathSoaP3RecipPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = recipf4( pnt->x );
+    result->y = recipf4( pnt->y );
+    result->z = recipf4( pnt->z );
+}
+
+static inline void vmathSoaP3SqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = sqrtf4( pnt->x );
+    result->y = sqrtf4( pnt->y );
+    result->z = sqrtf4( pnt->z );
+}
+
+static inline void vmathSoaP3RsqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = rsqrtf4( pnt->x );
+    result->y = rsqrtf4( pnt->y );
+    result->z = rsqrtf4( pnt->z );
+}
+
+static inline void vmathSoaP3AbsPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = fabsf4( pnt->x );
+    result->y = fabsf4( pnt->y );
+    result->z = fabsf4( pnt->z );
+}
+
+static inline void vmathSoaP3CopySignPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = copysignf4( pnt0->x, pnt1->x );
+    result->y = copysignf4( pnt0->y, pnt1->y );
+    result->z = copysignf4( pnt0->z, pnt1->z );
+}
+
+static inline void vmathSoaP3MaxPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = fmaxf4( pnt0->x, pnt1->x );
+    result->y = fmaxf4( pnt0->y, pnt1->y );
+    result->z = fmaxf4( pnt0->z, pnt1->z );
+}
+
+static inline vec_float4 vmathSoaP3MaxElem( const VmathSoaPoint3 *pnt )
+{
+    vec_float4 result;
+    result = fmaxf4( pnt->x, pnt->y );
+    result = fmaxf4( pnt->z, result );
+    return result;
+}
+
+static inline void vmathSoaP3MinPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = fminf4( pnt0->x, pnt1->x );
+    result->y = fminf4( pnt0->y, pnt1->y );
+    result->z = fminf4( pnt0->z, pnt1->z );
+}
+
+static inline vec_float4 vmathSoaP3MinElem( const VmathSoaPoint3 *pnt )
+{
+    vec_float4 result;
+    result = fminf4( pnt->x, pnt->y );
+    result = fminf4( pnt->z, result );
+    return result;
+}
+
+static inline vec_float4 vmathSoaP3Sum( const VmathSoaPoint3 *pnt )
+{
+    vec_float4 result;
+    result = spu_add( pnt->x, pnt->y );
+    result = spu_add( result, pnt->z );
+    return result;
+}
+
+static inline void vmathSoaP3Scale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, vec_float4 scaleVal )
+{
+    VmathSoaPoint3 tmpP3_0;
+    vmathSoaP3MakeFromScalar( &tmpP3_0, scaleVal );
+    vmathSoaP3MulPerElem( result, pnt, &tmpP3_0 );
+}
+
+static inline void vmathSoaP3NonUniformScale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *scaleVec )
+{
+    VmathSoaPoint3 tmpP3_0;
+    vmathSoaP3MakeFromV3( &tmpP3_0, scaleVec );
+    vmathSoaP3MulPerElem( result, pnt, &tmpP3_0 );
+}
+
+static inline vec_float4 vmathSoaP3Projection( const VmathSoaPoint3 *pnt, const VmathSoaVector3 *unitVec )
+{
+    vec_float4 result;
+    result = spu_mul( pnt->x, unitVec->x );
+    result = spu_add( result, spu_mul( pnt->y, unitVec->y ) );
+    result = spu_add( result, spu_mul( pnt->z, unitVec->z ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaP3DistSqrFromOrigin( const VmathSoaPoint3 *pnt )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaV3MakeFromP3( &tmpV3_0, pnt );
+    return vmathSoaV3LengthSqr( &tmpV3_0 );
+}
+
+static inline vec_float4 vmathSoaP3DistFromOrigin( const VmathSoaPoint3 *pnt )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaV3MakeFromP3( &tmpV3_0, pnt );
+    return vmathSoaV3Length( &tmpV3_0 );
+}
+
+static inline vec_float4 vmathSoaP3DistSqr( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
+    return vmathSoaV3LengthSqr( &tmpV3_0 );
+}
+
+static inline vec_float4 vmathSoaP3Dist( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
+    return vmathSoaV3Length( &tmpV3_0 );
+}
+
+static inline void vmathSoaP3Select( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_uint4 select1 )
+{
+    result->x = spu_sel( pnt0->x, pnt1->x, select1 );
+    result->y = spu_sel( pnt0->y, pnt1->y, select1 );
+    result->z = spu_sel( pnt0->z, pnt1->z, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaP3Print( const VmathSoaPoint3 *pnt )
+{
+    VmathPoint3 vec0, vec1, vec2, vec3;
+    vmathSoaP3Get4Aos( pnt, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathP3Print( &vec0 );
+    printf("slot 1:\n");
+    vmathP3Print( &vec1 );
+    printf("slot 2:\n");
+    vmathP3Print( &vec2 );
+    printf("slot 3:\n");
+    vmathP3Print( &vec3 );
+}
+
+static inline void vmathSoaP3Prints( const VmathSoaPoint3 *pnt, const char *name )
+{
+    VmathPoint3 vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    vmathSoaP3Get4Aos( pnt, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathP3Print( &vec0 );
+    printf("slot 1:\n");
+    vmathP3Print( &vec1 );
+    printf("slot 2:\n");
+    vmathP3Print( &vec2 );
+    printf("slot 3:\n");
+    vmathP3Print( &vec3 );
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/vec_soa_v.h b/Extras/vectormathlibrary/include/vectormath/spu/c/vec_soa_v.h
index 788fa5c39..560356a77 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/vec_soa_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/vec_soa_v.h
@@ -1,962 +1,962 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_SOA_V_C_H
-#define _VECTORMATH_VEC_SOA_V_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
- */
-#define _VECTORMATH_SHUF_X 0x00010203
-#define _VECTORMATH_SHUF_Y 0x04050607
-#define _VECTORMATH_SHUF_Z 0x08090a0b
-#define _VECTORMATH_SHUF_W 0x0c0d0e0f
-#define _VECTORMATH_SHUF_A 0x10111213
-#define _VECTORMATH_SHUF_B 0x14151617
-#define _VECTORMATH_SHUF_C 0x18191a1b
-#define _VECTORMATH_SHUF_D 0x1c1d1e1f
-#define _VECTORMATH_SHUF_0 0x80808080
-#define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_ZDW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_ZDXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_YAWC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_XYCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline VmathSoaVector3 vmathSoaV3MakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeFromElems(&result, _x, _y, _z);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeFromP3_V( VmathSoaPoint3 pnt )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeFromP3(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeFromAos_V( VmathVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeFromAos(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeFrom4Aos_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeFrom4Aos(&result, &vec0, &vec1, &vec2, &vec3);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeXAxis_V( )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeXAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeYAxis_V( )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeYAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeZAxis_V( )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeZAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3Lerp_V( vec_float4 t, VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Lerp(&result, t, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3Slerp_V( vec_float4 t, VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Slerp(&result, t, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline void vmathSoaV3Get4Aos_V( VmathSoaVector3 vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 )
-{
-    vmathSoaV3Get4Aos(&vec, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaV3LoadXYZArray_V( VmathSoaVector3 *vec, const vec_float4 *threeQuads )
-{
-    vmathSoaV3LoadXYZArray(vec, threeQuads);
-}
-
-static inline void vmathSoaV3StoreXYZArray_V( VmathSoaVector3 vec, vec_float4 *threeQuads )
-{
-    vmathSoaV3StoreXYZArray(&vec, threeQuads);
-}
-
-static inline void vmathSoaV3StoreHalfFloats_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_ushort8 *threeQuads )
-{
-    vmathSoaV3StoreHalfFloats(&vec0, &vec1, threeQuads);
-}
-
-static inline void vmathSoaV3SetX_V( VmathSoaVector3 *result, vec_float4 _x )
-{
-    vmathSoaV3SetX(result, _x);
-}
-
-static inline vec_float4 vmathSoaV3GetX_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3GetX(&vec);
-}
-
-static inline void vmathSoaV3SetY_V( VmathSoaVector3 *result, vec_float4 _y )
-{
-    vmathSoaV3SetY(result, _y);
-}
-
-static inline vec_float4 vmathSoaV3GetY_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3GetY(&vec);
-}
-
-static inline void vmathSoaV3SetZ_V( VmathSoaVector3 *result, vec_float4 _z )
-{
-    vmathSoaV3SetZ(result, _z);
-}
-
-static inline vec_float4 vmathSoaV3GetZ_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3GetZ(&vec);
-}
-
-static inline void vmathSoaV3SetElem_V( VmathSoaVector3 *result, int idx, vec_float4 value )
-{
-    vmathSoaV3SetElem(result, idx, value);
-}
-
-static inline vec_float4 vmathSoaV3GetElem_V( VmathSoaVector3 vec, int idx )
-{
-    return vmathSoaV3GetElem(&vec, idx);
-}
-
-static inline VmathSoaVector3 vmathSoaV3Add_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Add(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3Sub_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Sub(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaV3AddP3_V( VmathSoaVector3 vec, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaV3AddP3(&result, &vec, &pnt1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3ScalarMul_V( VmathSoaVector3 vec, vec_float4 scalar )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3ScalarMul(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3ScalarDiv_V( VmathSoaVector3 vec, vec_float4 scalar )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3ScalarDiv(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3Neg_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Neg(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MulPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MulPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3DivPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3DivPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3RecipPerElem_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3RecipPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3SqrtPerElem_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3SqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3RsqrtPerElem_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3RsqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3AbsPerElem_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3AbsPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3CopySignPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3CopySignPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MaxPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MaxPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline vec_float4 vmathSoaV3MaxElem_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3MaxElem(&vec);
-}
-
-static inline VmathSoaVector3 vmathSoaV3MinPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MinPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline vec_float4 vmathSoaV3MinElem_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3MinElem(&vec);
-}
-
-static inline vec_float4 vmathSoaV3Sum_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3Sum(&vec);
-}
-
-static inline vec_float4 vmathSoaV3Dot_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    return vmathSoaV3Dot(&vec0, &vec1);
-}
-
-static inline vec_float4 vmathSoaV3LengthSqr_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3LengthSqr(&vec);
-}
-
-static inline vec_float4 vmathSoaV3Length_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3Length(&vec);
-}
-
-static inline VmathSoaVector3 vmathSoaV3Normalize_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Normalize(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3Cross_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Cross(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3Select_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_uint4 select1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Select(&result, &vec0, &vec1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaV3Print_V( VmathSoaVector3 vec )
-{
-    vmathSoaV3Print(&vec);
-}
-
-static inline void vmathSoaV3Prints_V( VmathSoaVector3 vec, const char *name )
-{
-    vmathSoaV3Prints(&vec, name);
-}
-
-#endif
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromElems(&result, _x, _y, _z, _w);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 _w )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromV3Scalar(&result, &xyz, _w);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromV3_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromV3(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromP3_V( VmathSoaPoint3 pnt )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromP3(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromQ_V( VmathSoaQuat quat )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromQ(&result, &quat);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromAos_V( VmathVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromAos(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFrom4Aos_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFrom4Aos(&result, &vec0, &vec1, &vec2, &vec3);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeXAxis_V( )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeXAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeYAxis_V( )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeYAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeZAxis_V( )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeZAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeWAxis_V( )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeWAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4Lerp_V( vec_float4 t, VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Lerp(&result, t, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4Slerp_V( vec_float4 t, VmathSoaVector4 unitVec0, VmathSoaVector4 unitVec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Slerp(&result, t, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline void vmathSoaV4Get4Aos_V( VmathSoaVector4 vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 )
-{
-    vmathSoaV4Get4Aos(&vec, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaV4StoreHalfFloats_V( VmathSoaVector4 vec, vec_ushort8 *twoQuads )
-{
-    vmathSoaV4StoreHalfFloats(&vec, twoQuads);
-}
-
-static inline void vmathSoaV4SetXYZ_V( VmathSoaVector4 *result, VmathSoaVector3 vec )
-{
-    vmathSoaV4SetXYZ(result, &vec);
-}
-
-static inline VmathSoaVector3 vmathSoaV4GetXYZ_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV4GetXYZ(&result, &vec);
-    return result;
-}
-
-static inline void vmathSoaV4SetX_V( VmathSoaVector4 *result, vec_float4 _x )
-{
-    vmathSoaV4SetX(result, _x);
-}
-
-static inline vec_float4 vmathSoaV4GetX_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4GetX(&vec);
-}
-
-static inline void vmathSoaV4SetY_V( VmathSoaVector4 *result, vec_float4 _y )
-{
-    vmathSoaV4SetY(result, _y);
-}
-
-static inline vec_float4 vmathSoaV4GetY_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4GetY(&vec);
-}
-
-static inline void vmathSoaV4SetZ_V( VmathSoaVector4 *result, vec_float4 _z )
-{
-    vmathSoaV4SetZ(result, _z);
-}
-
-static inline vec_float4 vmathSoaV4GetZ_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4GetZ(&vec);
-}
-
-static inline void vmathSoaV4SetW_V( VmathSoaVector4 *result, vec_float4 _w )
-{
-    vmathSoaV4SetW(result, _w);
-}
-
-static inline vec_float4 vmathSoaV4GetW_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4GetW(&vec);
-}
-
-static inline void vmathSoaV4SetElem_V( VmathSoaVector4 *result, int idx, vec_float4 value )
-{
-    vmathSoaV4SetElem(result, idx, value);
-}
-
-static inline vec_float4 vmathSoaV4GetElem_V( VmathSoaVector4 vec, int idx )
-{
-    return vmathSoaV4GetElem(&vec, idx);
-}
-
-static inline VmathSoaVector4 vmathSoaV4Add_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Add(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4Sub_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Sub(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4ScalarMul_V( VmathSoaVector4 vec, vec_float4 scalar )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4ScalarMul(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4ScalarDiv_V( VmathSoaVector4 vec, vec_float4 scalar )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4ScalarDiv(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4Neg_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Neg(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MulPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MulPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4DivPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4DivPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4RecipPerElem_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4RecipPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4SqrtPerElem_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4SqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4RsqrtPerElem_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4RsqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4AbsPerElem_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4AbsPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4CopySignPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4CopySignPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MaxPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MaxPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline vec_float4 vmathSoaV4MaxElem_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4MaxElem(&vec);
-}
-
-static inline VmathSoaVector4 vmathSoaV4MinPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MinPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline vec_float4 vmathSoaV4MinElem_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4MinElem(&vec);
-}
-
-static inline vec_float4 vmathSoaV4Sum_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4Sum(&vec);
-}
-
-static inline vec_float4 vmathSoaV4Dot_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    return vmathSoaV4Dot(&vec0, &vec1);
-}
-
-static inline vec_float4 vmathSoaV4LengthSqr_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4LengthSqr(&vec);
-}
-
-static inline vec_float4 vmathSoaV4Length_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4Length(&vec);
-}
-
-static inline VmathSoaVector4 vmathSoaV4Normalize_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Normalize(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4Select_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1, vec_uint4 select1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Select(&result, &vec0, &vec1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaV4Print_V( VmathSoaVector4 vec )
-{
-    vmathSoaV4Print(&vec);
-}
-
-static inline void vmathSoaV4Prints_V( VmathSoaVector4 vec, const char *name )
-{
-    vmathSoaV4Prints(&vec, name);
-}
-
-#endif
-
-static inline VmathSoaPoint3 vmathSoaP3MakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MakeFromElems(&result, _x, _y, _z);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MakeFromV3_V( VmathSoaVector3 vec )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MakeFromV3(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MakeFromAos_V( VmathPoint3 pnt )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MakeFromAos(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MakeFrom4Aos_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MakeFrom4Aos(&result, &pnt0, &pnt1, &pnt2, &pnt3);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3Lerp_V( vec_float4 t, VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3Lerp(&result, t, &pnt0, &pnt1);
-    return result;
-}
-
-static inline void vmathSoaP3Get4Aos_V( VmathSoaPoint3 pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 )
-{
-    vmathSoaP3Get4Aos(&pnt, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaP3LoadXYZArray_V( VmathSoaPoint3 *vec, const vec_float4 *threeQuads )
-{
-    vmathSoaP3LoadXYZArray(vec, threeQuads);
-}
-
-static inline void vmathSoaP3StoreXYZArray_V( VmathSoaPoint3 vec, vec_float4 *threeQuads )
-{
-    vmathSoaP3StoreXYZArray(&vec, threeQuads);
-}
-
-static inline void vmathSoaP3StoreHalfFloats_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_ushort8 *threeQuads )
-{
-    vmathSoaP3StoreHalfFloats(&pnt0, &pnt1, threeQuads);
-}
-
-static inline void vmathSoaP3SetX_V( VmathSoaPoint3 *result, vec_float4 _x )
-{
-    vmathSoaP3SetX(result, _x);
-}
-
-static inline vec_float4 vmathSoaP3GetX_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3GetX(&pnt);
-}
-
-static inline void vmathSoaP3SetY_V( VmathSoaPoint3 *result, vec_float4 _y )
-{
-    vmathSoaP3SetY(result, _y);
-}
-
-static inline vec_float4 vmathSoaP3GetY_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3GetY(&pnt);
-}
-
-static inline void vmathSoaP3SetZ_V( VmathSoaPoint3 *result, vec_float4 _z )
-{
-    vmathSoaP3SetZ(result, _z);
-}
-
-static inline vec_float4 vmathSoaP3GetZ_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3GetZ(&pnt);
-}
-
-static inline void vmathSoaP3SetElem_V( VmathSoaPoint3 *result, int idx, vec_float4 value )
-{
-    vmathSoaP3SetElem(result, idx, value);
-}
-
-static inline vec_float4 vmathSoaP3GetElem_V( VmathSoaPoint3 pnt, int idx )
-{
-    return vmathSoaP3GetElem(&pnt, idx);
-}
-
-static inline VmathSoaVector3 vmathSoaP3Sub_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaP3Sub(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3AddV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3AddV3(&result, &pnt, &vec1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3SubV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3SubV3(&result, &pnt, &vec1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MulPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MulPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3DivPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3DivPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3RecipPerElem_V( VmathSoaPoint3 pnt )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3RecipPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3SqrtPerElem_V( VmathSoaPoint3 pnt )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3SqrtPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3RsqrtPerElem_V( VmathSoaPoint3 pnt )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3RsqrtPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3AbsPerElem_V( VmathSoaPoint3 pnt )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3AbsPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3CopySignPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3CopySignPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MaxPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MaxPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline vec_float4 vmathSoaP3MaxElem_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3MaxElem(&pnt);
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MinPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MinPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline vec_float4 vmathSoaP3MinElem_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3MinElem(&pnt);
-}
-
-static inline vec_float4 vmathSoaP3Sum_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3Sum(&pnt);
-}
-
-static inline VmathSoaPoint3 vmathSoaP3Scale_V( VmathSoaPoint3 pnt, vec_float4 scaleVal )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3Scale(&result, &pnt, scaleVal);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3NonUniformScale_V( VmathSoaPoint3 pnt, VmathSoaVector3 scaleVec )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3NonUniformScale(&result, &pnt, &scaleVec);
-    return result;
-}
-
-static inline vec_float4 vmathSoaP3Projection_V( VmathSoaPoint3 pnt, VmathSoaVector3 unitVec )
-{
-    return vmathSoaP3Projection(&pnt, &unitVec);
-}
-
-static inline vec_float4 vmathSoaP3DistSqrFromOrigin_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3DistSqrFromOrigin(&pnt);
-}
-
-static inline vec_float4 vmathSoaP3DistFromOrigin_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3DistFromOrigin(&pnt);
-}
-
-static inline vec_float4 vmathSoaP3DistSqr_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    return vmathSoaP3DistSqr(&pnt0, &pnt1);
-}
-
-static inline vec_float4 vmathSoaP3Dist_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    return vmathSoaP3Dist(&pnt0, &pnt1);
-}
-
-static inline VmathSoaPoint3 vmathSoaP3Select_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_uint4 select1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3Select(&result, &pnt0, &pnt1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaP3Print_V( VmathSoaPoint3 pnt )
-{
-    vmathSoaP3Print(&pnt);
-}
-
-static inline void vmathSoaP3Prints_V( VmathSoaPoint3 pnt, const char *name )
-{
-    vmathSoaP3Prints(&pnt, name);
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_SOA_V_C_H
+#define _VECTORMATH_VEC_SOA_V_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+ */
+#define _VECTORMATH_SHUF_X 0x00010203
+#define _VECTORMATH_SHUF_Y 0x04050607
+#define _VECTORMATH_SHUF_Z 0x08090a0b
+#define _VECTORMATH_SHUF_W 0x0c0d0e0f
+#define _VECTORMATH_SHUF_A 0x10111213
+#define _VECTORMATH_SHUF_B 0x14151617
+#define _VECTORMATH_SHUF_C 0x18191a1b
+#define _VECTORMATH_SHUF_D 0x1c1d1e1f
+#define _VECTORMATH_SHUF_0 0x80808080
+#define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_ZDW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_ZDXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_YAWC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_XYCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline VmathSoaVector3 vmathSoaV3MakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeFromElems(&result, _x, _y, _z);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeFromP3_V( VmathSoaPoint3 pnt )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeFromP3(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeFromAos_V( VmathVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeFromAos(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeFrom4Aos_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeFrom4Aos(&result, &vec0, &vec1, &vec2, &vec3);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeXAxis_V( )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeXAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeYAxis_V( )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeYAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeZAxis_V( )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeZAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3Lerp_V( vec_float4 t, VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Lerp(&result, t, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3Slerp_V( vec_float4 t, VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Slerp(&result, t, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline void vmathSoaV3Get4Aos_V( VmathSoaVector3 vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 )
+{
+    vmathSoaV3Get4Aos(&vec, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaV3LoadXYZArray_V( VmathSoaVector3 *vec, const vec_float4 *threeQuads )
+{
+    vmathSoaV3LoadXYZArray(vec, threeQuads);
+}
+
+static inline void vmathSoaV3StoreXYZArray_V( VmathSoaVector3 vec, vec_float4 *threeQuads )
+{
+    vmathSoaV3StoreXYZArray(&vec, threeQuads);
+}
+
+static inline void vmathSoaV3StoreHalfFloats_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_ushort8 *threeQuads )
+{
+    vmathSoaV3StoreHalfFloats(&vec0, &vec1, threeQuads);
+}
+
+static inline void vmathSoaV3SetX_V( VmathSoaVector3 *result, vec_float4 _x )
+{
+    vmathSoaV3SetX(result, _x);
+}
+
+static inline vec_float4 vmathSoaV3GetX_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3GetX(&vec);
+}
+
+static inline void vmathSoaV3SetY_V( VmathSoaVector3 *result, vec_float4 _y )
+{
+    vmathSoaV3SetY(result, _y);
+}
+
+static inline vec_float4 vmathSoaV3GetY_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3GetY(&vec);
+}
+
+static inline void vmathSoaV3SetZ_V( VmathSoaVector3 *result, vec_float4 _z )
+{
+    vmathSoaV3SetZ(result, _z);
+}
+
+static inline vec_float4 vmathSoaV3GetZ_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3GetZ(&vec);
+}
+
+static inline void vmathSoaV3SetElem_V( VmathSoaVector3 *result, int idx, vec_float4 value )
+{
+    vmathSoaV3SetElem(result, idx, value);
+}
+
+static inline vec_float4 vmathSoaV3GetElem_V( VmathSoaVector3 vec, int idx )
+{
+    return vmathSoaV3GetElem(&vec, idx);
+}
+
+static inline VmathSoaVector3 vmathSoaV3Add_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Add(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3Sub_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Sub(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaV3AddP3_V( VmathSoaVector3 vec, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaV3AddP3(&result, &vec, &pnt1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3ScalarMul_V( VmathSoaVector3 vec, vec_float4 scalar )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3ScalarMul(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3ScalarDiv_V( VmathSoaVector3 vec, vec_float4 scalar )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3ScalarDiv(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3Neg_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Neg(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MulPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MulPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3DivPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3DivPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3RecipPerElem_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3RecipPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3SqrtPerElem_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3SqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3RsqrtPerElem_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3RsqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3AbsPerElem_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3AbsPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3CopySignPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3CopySignPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MaxPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MaxPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline vec_float4 vmathSoaV3MaxElem_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3MaxElem(&vec);
+}
+
+static inline VmathSoaVector3 vmathSoaV3MinPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MinPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline vec_float4 vmathSoaV3MinElem_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3MinElem(&vec);
+}
+
+static inline vec_float4 vmathSoaV3Sum_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3Sum(&vec);
+}
+
+static inline vec_float4 vmathSoaV3Dot_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    return vmathSoaV3Dot(&vec0, &vec1);
+}
+
+static inline vec_float4 vmathSoaV3LengthSqr_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3LengthSqr(&vec);
+}
+
+static inline vec_float4 vmathSoaV3Length_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3Length(&vec);
+}
+
+static inline VmathSoaVector3 vmathSoaV3Normalize_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Normalize(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3Cross_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Cross(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3Select_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_uint4 select1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Select(&result, &vec0, &vec1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaV3Print_V( VmathSoaVector3 vec )
+{
+    vmathSoaV3Print(&vec);
+}
+
+static inline void vmathSoaV3Prints_V( VmathSoaVector3 vec, const char *name )
+{
+    vmathSoaV3Prints(&vec, name);
+}
+
+#endif
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromElems(&result, _x, _y, _z, _w);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 _w )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromV3Scalar(&result, &xyz, _w);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromV3_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromV3(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromP3_V( VmathSoaPoint3 pnt )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromP3(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromQ_V( VmathSoaQuat quat )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromQ(&result, &quat);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromAos_V( VmathVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromAos(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFrom4Aos_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFrom4Aos(&result, &vec0, &vec1, &vec2, &vec3);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeXAxis_V( )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeXAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeYAxis_V( )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeYAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeZAxis_V( )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeZAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeWAxis_V( )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeWAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4Lerp_V( vec_float4 t, VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Lerp(&result, t, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4Slerp_V( vec_float4 t, VmathSoaVector4 unitVec0, VmathSoaVector4 unitVec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Slerp(&result, t, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline void vmathSoaV4Get4Aos_V( VmathSoaVector4 vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 )
+{
+    vmathSoaV4Get4Aos(&vec, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaV4StoreHalfFloats_V( VmathSoaVector4 vec, vec_ushort8 *twoQuads )
+{
+    vmathSoaV4StoreHalfFloats(&vec, twoQuads);
+}
+
+static inline void vmathSoaV4SetXYZ_V( VmathSoaVector4 *result, VmathSoaVector3 vec )
+{
+    vmathSoaV4SetXYZ(result, &vec);
+}
+
+static inline VmathSoaVector3 vmathSoaV4GetXYZ_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV4GetXYZ(&result, &vec);
+    return result;
+}
+
+static inline void vmathSoaV4SetX_V( VmathSoaVector4 *result, vec_float4 _x )
+{
+    vmathSoaV4SetX(result, _x);
+}
+
+static inline vec_float4 vmathSoaV4GetX_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4GetX(&vec);
+}
+
+static inline void vmathSoaV4SetY_V( VmathSoaVector4 *result, vec_float4 _y )
+{
+    vmathSoaV4SetY(result, _y);
+}
+
+static inline vec_float4 vmathSoaV4GetY_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4GetY(&vec);
+}
+
+static inline void vmathSoaV4SetZ_V( VmathSoaVector4 *result, vec_float4 _z )
+{
+    vmathSoaV4SetZ(result, _z);
+}
+
+static inline vec_float4 vmathSoaV4GetZ_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4GetZ(&vec);
+}
+
+static inline void vmathSoaV4SetW_V( VmathSoaVector4 *result, vec_float4 _w )
+{
+    vmathSoaV4SetW(result, _w);
+}
+
+static inline vec_float4 vmathSoaV4GetW_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4GetW(&vec);
+}
+
+static inline void vmathSoaV4SetElem_V( VmathSoaVector4 *result, int idx, vec_float4 value )
+{
+    vmathSoaV4SetElem(result, idx, value);
+}
+
+static inline vec_float4 vmathSoaV4GetElem_V( VmathSoaVector4 vec, int idx )
+{
+    return vmathSoaV4GetElem(&vec, idx);
+}
+
+static inline VmathSoaVector4 vmathSoaV4Add_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Add(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4Sub_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Sub(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4ScalarMul_V( VmathSoaVector4 vec, vec_float4 scalar )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4ScalarMul(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4ScalarDiv_V( VmathSoaVector4 vec, vec_float4 scalar )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4ScalarDiv(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4Neg_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Neg(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MulPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MulPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4DivPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4DivPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4RecipPerElem_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4RecipPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4SqrtPerElem_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4SqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4RsqrtPerElem_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4RsqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4AbsPerElem_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4AbsPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4CopySignPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4CopySignPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MaxPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MaxPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline vec_float4 vmathSoaV4MaxElem_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4MaxElem(&vec);
+}
+
+static inline VmathSoaVector4 vmathSoaV4MinPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MinPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline vec_float4 vmathSoaV4MinElem_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4MinElem(&vec);
+}
+
+static inline vec_float4 vmathSoaV4Sum_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4Sum(&vec);
+}
+
+static inline vec_float4 vmathSoaV4Dot_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    return vmathSoaV4Dot(&vec0, &vec1);
+}
+
+static inline vec_float4 vmathSoaV4LengthSqr_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4LengthSqr(&vec);
+}
+
+static inline vec_float4 vmathSoaV4Length_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4Length(&vec);
+}
+
+static inline VmathSoaVector4 vmathSoaV4Normalize_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Normalize(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4Select_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1, vec_uint4 select1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Select(&result, &vec0, &vec1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaV4Print_V( VmathSoaVector4 vec )
+{
+    vmathSoaV4Print(&vec);
+}
+
+static inline void vmathSoaV4Prints_V( VmathSoaVector4 vec, const char *name )
+{
+    vmathSoaV4Prints(&vec, name);
+}
+
+#endif
+
+static inline VmathSoaPoint3 vmathSoaP3MakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MakeFromElems(&result, _x, _y, _z);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MakeFromV3_V( VmathSoaVector3 vec )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MakeFromV3(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MakeFromAos_V( VmathPoint3 pnt )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MakeFromAos(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MakeFrom4Aos_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MakeFrom4Aos(&result, &pnt0, &pnt1, &pnt2, &pnt3);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3Lerp_V( vec_float4 t, VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3Lerp(&result, t, &pnt0, &pnt1);
+    return result;
+}
+
+static inline void vmathSoaP3Get4Aos_V( VmathSoaPoint3 pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 )
+{
+    vmathSoaP3Get4Aos(&pnt, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaP3LoadXYZArray_V( VmathSoaPoint3 *vec, const vec_float4 *threeQuads )
+{
+    vmathSoaP3LoadXYZArray(vec, threeQuads);
+}
+
+static inline void vmathSoaP3StoreXYZArray_V( VmathSoaPoint3 vec, vec_float4 *threeQuads )
+{
+    vmathSoaP3StoreXYZArray(&vec, threeQuads);
+}
+
+static inline void vmathSoaP3StoreHalfFloats_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_ushort8 *threeQuads )
+{
+    vmathSoaP3StoreHalfFloats(&pnt0, &pnt1, threeQuads);
+}
+
+static inline void vmathSoaP3SetX_V( VmathSoaPoint3 *result, vec_float4 _x )
+{
+    vmathSoaP3SetX(result, _x);
+}
+
+static inline vec_float4 vmathSoaP3GetX_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3GetX(&pnt);
+}
+
+static inline void vmathSoaP3SetY_V( VmathSoaPoint3 *result, vec_float4 _y )
+{
+    vmathSoaP3SetY(result, _y);
+}
+
+static inline vec_float4 vmathSoaP3GetY_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3GetY(&pnt);
+}
+
+static inline void vmathSoaP3SetZ_V( VmathSoaPoint3 *result, vec_float4 _z )
+{
+    vmathSoaP3SetZ(result, _z);
+}
+
+static inline vec_float4 vmathSoaP3GetZ_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3GetZ(&pnt);
+}
+
+static inline void vmathSoaP3SetElem_V( VmathSoaPoint3 *result, int idx, vec_float4 value )
+{
+    vmathSoaP3SetElem(result, idx, value);
+}
+
+static inline vec_float4 vmathSoaP3GetElem_V( VmathSoaPoint3 pnt, int idx )
+{
+    return vmathSoaP3GetElem(&pnt, idx);
+}
+
+static inline VmathSoaVector3 vmathSoaP3Sub_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaP3Sub(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3AddV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3AddV3(&result, &pnt, &vec1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3SubV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3SubV3(&result, &pnt, &vec1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MulPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MulPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3DivPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3DivPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3RecipPerElem_V( VmathSoaPoint3 pnt )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3RecipPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3SqrtPerElem_V( VmathSoaPoint3 pnt )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3SqrtPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3RsqrtPerElem_V( VmathSoaPoint3 pnt )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3RsqrtPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3AbsPerElem_V( VmathSoaPoint3 pnt )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3AbsPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3CopySignPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3CopySignPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MaxPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MaxPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline vec_float4 vmathSoaP3MaxElem_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3MaxElem(&pnt);
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MinPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MinPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline vec_float4 vmathSoaP3MinElem_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3MinElem(&pnt);
+}
+
+static inline vec_float4 vmathSoaP3Sum_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3Sum(&pnt);
+}
+
+static inline VmathSoaPoint3 vmathSoaP3Scale_V( VmathSoaPoint3 pnt, vec_float4 scaleVal )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3Scale(&result, &pnt, scaleVal);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3NonUniformScale_V( VmathSoaPoint3 pnt, VmathSoaVector3 scaleVec )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3NonUniformScale(&result, &pnt, &scaleVec);
+    return result;
+}
+
+static inline vec_float4 vmathSoaP3Projection_V( VmathSoaPoint3 pnt, VmathSoaVector3 unitVec )
+{
+    return vmathSoaP3Projection(&pnt, &unitVec);
+}
+
+static inline vec_float4 vmathSoaP3DistSqrFromOrigin_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3DistSqrFromOrigin(&pnt);
+}
+
+static inline vec_float4 vmathSoaP3DistFromOrigin_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3DistFromOrigin(&pnt);
+}
+
+static inline vec_float4 vmathSoaP3DistSqr_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    return vmathSoaP3DistSqr(&pnt0, &pnt1);
+}
+
+static inline vec_float4 vmathSoaP3Dist_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    return vmathSoaP3Dist(&pnt0, &pnt1);
+}
+
+static inline VmathSoaPoint3 vmathSoaP3Select_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_uint4 select1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3Select(&result, &pnt0, &pnt1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaP3Print_V( VmathSoaPoint3 pnt )
+{
+    vmathSoaP3Print(&pnt);
+}
+
+static inline void vmathSoaP3Prints_V( VmathSoaPoint3 pnt, const char *name )
+{
+    vmathSoaP3Prints(&pnt, name);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_aos.h b/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_aos.h
index 5fa9950e8..3bd4e0fe2 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_aos.h
@@ -1,1951 +1,1951 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_AOS_C_SPU_H
-#define _VECTORMATH_AOS_C_SPU_H
-
-#include <math.h>
-#include <simdmath.h>
-#include <stdio.h>
-
-#ifdef _VECTORMATH_DEBUG
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#ifndef _VECTORMATH_AOS_C_TYPES_H
-#define _VECTORMATH_AOS_C_TYPES_H
-
-/* A 3-D vector in array-of-structures format
- */
-typedef struct _VmathVector3
-{
-    vec_float4 vec128;
-} VmathVector3;
-
-/* A 4-D vector in array-of-structures format
- */
-typedef struct _VmathVector4
-{
-    vec_float4 vec128;
-} VmathVector4;
-
-/* A 3-D point in array-of-structures format
- */
-typedef struct _VmathPoint3
-{
-    vec_float4 vec128;
-} VmathPoint3;
-
-/* A quaternion in array-of-structures format
- */
-typedef struct _VmathQuat
-{
-    vec_float4 vec128;
-} VmathQuat;
-
-/* A 3x3 matrix in array-of-structures format
- */
-typedef struct _VmathMatrix3
-{
-    VmathVector3 col0;
-    VmathVector3 col1;
-    VmathVector3 col2;
-} VmathMatrix3;
-
-/* A 4x4 matrix in array-of-structures format
- */
-typedef struct _VmathMatrix4
-{
-    VmathVector4 col0;
-    VmathVector4 col1;
-    VmathVector4 col2;
-    VmathVector4 col3;
-} VmathMatrix4;
-
-/* A 3x4 transformation matrix in array-of-structures format
- */
-typedef struct _VmathTransform3
-{
-    VmathVector3 col0;
-    VmathVector3 col1;
-    VmathVector3 col2;
-    VmathVector3 col3;
-} VmathTransform3;
-
-#endif
-
-/*
- * Copy a 3-D vector
- */
-static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Construct a 3-D vector from x, y, and z elements
- */
-static inline void vmathV3MakeFromElems( VmathVector3 *result, float x, float y, float z );
-
-/*
- * Copy elements from a 3-D point into a 3-D vector
- */
-static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt );
-
-/*
- * Set all elements of a 3-D vector to the same scalar value
- */
-static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar );
-
-/*
- * Set vector float data in a 3-D vector
- */
-static inline void vmathV3MakeFrom128( VmathVector3 *result, vec_float4 vf4 );
-
-/*
- * Get vector float data from a 3-D vector
- */
-static inline vec_float4 vmathV3Get128( const VmathVector3 *vec );
-
-/*
- * Set the x element of a 3-D vector
- */
-static inline void vmathV3SetX( VmathVector3 *result, float x );
-
-/*
- * Set the y element of a 3-D vector
- */
-static inline void vmathV3SetY( VmathVector3 *result, float y );
-
-/*
- * Set the z element of a 3-D vector
- */
-static inline void vmathV3SetZ( VmathVector3 *result, float z );
-
-/*
- * Get the x element of a 3-D vector
- */
-static inline float vmathV3GetX( const VmathVector3 *vec );
-
-/*
- * Get the y element of a 3-D vector
- */
-static inline float vmathV3GetY( const VmathVector3 *vec );
-
-/*
- * Get the z element of a 3-D vector
- */
-static inline float vmathV3GetZ( const VmathVector3 *vec );
-
-/*
- * Set an x, y, or z element of a 3-D vector by index
- */
-static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value );
-
-/*
- * Get an x, y, or z element of a 3-D vector by index
- */
-static inline float vmathV3GetElem( const VmathVector3 *vec, int idx );
-
-/*
- * Add two 3-D vectors
- */
-static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Subtract a 3-D vector from another 3-D vector
- */
-static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Add a 3-D vector to a 3-D point
- */
-static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt );
-
-/*
- * Multiply a 3-D vector by a scalar
- */
-static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar );
-
-/*
- * Divide a 3-D vector by a scalar
- */
-static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar );
-
-/*
- * Negate all elements of a 3-D vector
- */
-static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Construct x axis
- */
-static inline void vmathV3MakeXAxis( VmathVector3 *result );
-
-/*
- * Construct y axis
- */
-static inline void vmathV3MakeYAxis( VmathVector3 *result );
-
-/*
- * Construct z axis
- */
-static inline void vmathV3MakeZAxis( VmathVector3 *result );
-
-/*
- * Multiply two 3-D vectors per element
- */
-static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Divide two 3-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Compute the reciprocal of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Compute the square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Compute the reciprocal square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Compute the absolute value of a 3-D vector per element
- */
-static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Copy sign from one 3-D vector to another, per element
- */
-static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Maximum of two 3-D vectors per element
- */
-static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Minimum of two 3-D vectors per element
- */
-static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Maximum element of a 3-D vector
- */
-static inline float vmathV3MaxElem( const VmathVector3 *vec );
-
-/*
- * Minimum element of a 3-D vector
- */
-static inline float vmathV3MinElem( const VmathVector3 *vec );
-
-/*
- * Compute the sum of all elements of a 3-D vector
- */
-static inline float vmathV3Sum( const VmathVector3 *vec );
-
-/*
- * Compute the dot product of two 3-D vectors
- */
-static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Compute the square of the length of a 3-D vector
- */
-static inline float vmathV3LengthSqr( const VmathVector3 *vec );
-
-/*
- * Compute the length of a 3-D vector
- */
-static inline float vmathV3Length( const VmathVector3 *vec );
-
-/*
- * Normalize a 3-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Compute cross product of two 3-D vectors
- */
-static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Outer product of two 3-D vectors
- */
-static inline void vmathV3Outer( VmathMatrix3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Pre-multiply a row vector by a 3x3 matrix
- * NOTE: 
- * Slower than column post-multiply.
- */
-static inline void vmathV3RowMul( VmathVector3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat );
-
-/*
- * Cross-product matrix of a 3-D vector
- */
-static inline void vmathV3CrossMatrix( VmathMatrix3 *result, const VmathVector3 *vec );
-
-/*
- * Create cross-product matrix and multiply
- * NOTE: 
- * Faster than separately creating a cross-product matrix and multiplying.
- */
-static inline void vmathV3CrossMatrixMul( VmathMatrix3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat );
-
-/*
- * Linear interpolation between two 3-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Spherical linear interpolation between two 3-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 );
-
-/*
- * Conditionally select between two 3-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 );
-
-/*
- * Store x, y, and z elements of a 3-D vector in the first three words of a quadword.
- * The value of the fourth word (the word with the highest address) remains unchanged
- */
-static inline void vmathV3StoreXYZ( const VmathVector3 *vec, vec_float4 *quad );
-
-/*
- * Load four three-float 3-D vectors, stored in three quadwords
- */
-static inline void vmathV3LoadXYZArray( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads );
-
-/*
- * Store four 3-D vectors in three quadwords
- */
-static inline void vmathV3StoreXYZArray( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, vec_float4 *threeQuads );
-
-/*
- * Store eight 3-D vectors as half-floats
- */
-static inline void vmathV3StoreHalfFloats( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, const VmathVector3 *vec4, const VmathVector3 *vec5, const VmathVector3 *vec6, const VmathVector3 *vec7, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV3Print( const VmathVector3 *vec );
-
-/*
- * Print a 3-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV3Prints( const VmathVector3 *vec, const char *name );
-
-#endif
-
-/*
- * Copy a 4-D vector
- */
-static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Construct a 4-D vector from x, y, z, and w elements
- */
-static inline void vmathV4MakeFromElems( VmathVector4 *result, float x, float y, float z, float w );
-
-/*
- * Construct a 4-D vector from a 3-D vector and a scalar
- */
-static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float w );
-
-/*
- * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
- */
-static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec );
-
-/*
- * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
- */
-static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt );
-
-/*
- * Copy elements from a quaternion into a 4-D vector
- */
-static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat );
-
-/*
- * Set all elements of a 4-D vector to the same scalar value
- */
-static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar );
-
-/*
- * Set vector float data in a 4-D vector
- */
-static inline void vmathV4MakeFrom128( VmathVector4 *result, vec_float4 vf4 );
-
-/*
- * Get vector float data from a 4-D vector
- */
-static inline vec_float4 vmathV4Get128( const VmathVector4 *vec );
-
-/*
- * Set the x, y, and z elements of a 4-D vector
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec );
-
-/*
- * Get the x, y, and z elements of a 4-D vector
- */
-static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec );
-
-/*
- * Set the x element of a 4-D vector
- */
-static inline void vmathV4SetX( VmathVector4 *result, float x );
-
-/*
- * Set the y element of a 4-D vector
- */
-static inline void vmathV4SetY( VmathVector4 *result, float y );
-
-/*
- * Set the z element of a 4-D vector
- */
-static inline void vmathV4SetZ( VmathVector4 *result, float z );
-
-/*
- * Set the w element of a 4-D vector
- */
-static inline void vmathV4SetW( VmathVector4 *result, float w );
-
-/*
- * Get the x element of a 4-D vector
- */
-static inline float vmathV4GetX( const VmathVector4 *vec );
-
-/*
- * Get the y element of a 4-D vector
- */
-static inline float vmathV4GetY( const VmathVector4 *vec );
-
-/*
- * Get the z element of a 4-D vector
- */
-static inline float vmathV4GetZ( const VmathVector4 *vec );
-
-/*
- * Get the w element of a 4-D vector
- */
-static inline float vmathV4GetW( const VmathVector4 *vec );
-
-/*
- * Set an x, y, z, or w element of a 4-D vector by index
- */
-static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value );
-
-/*
- * Get an x, y, z, or w element of a 4-D vector by index
- */
-static inline float vmathV4GetElem( const VmathVector4 *vec, int idx );
-
-/*
- * Add two 4-D vectors
- */
-static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Subtract a 4-D vector from another 4-D vector
- */
-static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Multiply a 4-D vector by a scalar
- */
-static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar );
-
-/*
- * Divide a 4-D vector by a scalar
- */
-static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar );
-
-/*
- * Negate all elements of a 4-D vector
- */
-static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Construct x axis
- */
-static inline void vmathV4MakeXAxis( VmathVector4 *result );
-
-/*
- * Construct y axis
- */
-static inline void vmathV4MakeYAxis( VmathVector4 *result );
-
-/*
- * Construct z axis
- */
-static inline void vmathV4MakeZAxis( VmathVector4 *result );
-
-/*
- * Construct w axis
- */
-static inline void vmathV4MakeWAxis( VmathVector4 *result );
-
-/*
- * Multiply two 4-D vectors per element
- */
-static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Divide two 4-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Compute the reciprocal of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Compute the square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Compute the reciprocal square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Compute the absolute value of a 4-D vector per element
- */
-static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Copy sign from one 4-D vector to another, per element
- */
-static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Maximum of two 4-D vectors per element
- */
-static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Minimum of two 4-D vectors per element
- */
-static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Maximum element of a 4-D vector
- */
-static inline float vmathV4MaxElem( const VmathVector4 *vec );
-
-/*
- * Minimum element of a 4-D vector
- */
-static inline float vmathV4MinElem( const VmathVector4 *vec );
-
-/*
- * Compute the sum of all elements of a 4-D vector
- */
-static inline float vmathV4Sum( const VmathVector4 *vec );
-
-/*
- * Compute the dot product of two 4-D vectors
- */
-static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Compute the square of the length of a 4-D vector
- */
-static inline float vmathV4LengthSqr( const VmathVector4 *vec );
-
-/*
- * Compute the length of a 4-D vector
- */
-static inline float vmathV4Length( const VmathVector4 *vec );
-
-/*
- * Normalize a 4-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Outer product of two 4-D vectors
- */
-static inline void vmathV4Outer( VmathMatrix4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Linear interpolation between two 4-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Spherical linear interpolation between two 4-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 );
-
-/*
- * Conditionally select between two 4-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 );
-
-/*
- * Store four 4-D vectors as half-floats
- */
-static inline void vmathV4StoreHalfFloats( const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3, vec_ushort8 *twoQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV4Print( const VmathVector4 *vec );
-
-/*
- * Print a 4-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV4Prints( const VmathVector4 *vec, const char *name );
-
-#endif
-
-/*
- * Copy a 3-D point
- */
-static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Construct a 3-D point from x, y, and z elements
- */
-static inline void vmathP3MakeFromElems( VmathPoint3 *result, float x, float y, float z );
-
-/*
- * Copy elements from a 3-D vector into a 3-D point
- */
-static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec );
-
-/*
- * Set all elements of a 3-D point to the same scalar value
- */
-static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar );
-
-/*
- * Set vector float data in a 3-D point
- */
-static inline void vmathP3MakeFrom128( VmathPoint3 *result, vec_float4 vf4 );
-
-/*
- * Get vector float data from a 3-D point
- */
-static inline vec_float4 vmathP3Get128( const VmathPoint3 *pnt );
-
-/*
- * Set the x element of a 3-D point
- */
-static inline void vmathP3SetX( VmathPoint3 *result, float x );
-
-/*
- * Set the y element of a 3-D point
- */
-static inline void vmathP3SetY( VmathPoint3 *result, float y );
-
-/*
- * Set the z element of a 3-D point
- */
-static inline void vmathP3SetZ( VmathPoint3 *result, float z );
-
-/*
- * Get the x element of a 3-D point
- */
-static inline float vmathP3GetX( const VmathPoint3 *pnt );
-
-/*
- * Get the y element of a 3-D point
- */
-static inline float vmathP3GetY( const VmathPoint3 *pnt );
-
-/*
- * Get the z element of a 3-D point
- */
-static inline float vmathP3GetZ( const VmathPoint3 *pnt );
-
-/*
- * Set an x, y, or z element of a 3-D point by index
- */
-static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value );
-
-/*
- * Get an x, y, or z element of a 3-D point by index
- */
-static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx );
-
-/*
- * Subtract a 3-D point from another 3-D point
- */
-static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Add a 3-D point to a 3-D vector
- */
-static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec );
-
-/*
- * Subtract a 3-D vector from a 3-D point
- */
-static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec );
-
-/*
- * Multiply two 3-D points per element
- */
-static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Divide two 3-D points per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Compute the reciprocal of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Compute the square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Compute the reciprocal square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Compute the absolute value of a 3-D point per element
- */
-static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Copy sign from one 3-D point to another, per element
- */
-static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Maximum of two 3-D points per element
- */
-static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Minimum of two 3-D points per element
- */
-static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Maximum element of a 3-D point
- */
-static inline float vmathP3MaxElem( const VmathPoint3 *pnt );
-
-/*
- * Minimum element of a 3-D point
- */
-static inline float vmathP3MinElem( const VmathPoint3 *pnt );
-
-/*
- * Compute the sum of all elements of a 3-D point
- */
-static inline float vmathP3Sum( const VmathPoint3 *pnt );
-
-/*
- * Apply uniform scale to a 3-D point
- */
-static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal );
-
-/*
- * Apply non-uniform scale to a 3-D point
- */
-static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec );
-
-/*
- * Scalar projection of a 3-D point on a unit-length 3-D vector
- */
-static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec );
-
-/*
- * Compute the square of the distance of a 3-D point from the coordinate-system origin
- */
-static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt );
-
-/*
- * Compute the distance of a 3-D point from the coordinate-system origin
- */
-static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt );
-
-/*
- * Compute the square of the distance between two 3-D points
- */
-static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Compute the distance between two 3-D points
- */
-static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Linear interpolation between two 3-D points
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Conditionally select between two 3-D points
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 );
-
-/*
- * Store x, y, and z elements of a 3-D point in the first three words of a quadword.
- * The value of the fourth word (the word with the highest address) remains unchanged
- */
-static inline void vmathP3StoreXYZ( const VmathPoint3 *pnt, vec_float4 *quad );
-
-/*
- * Load four three-float 3-D points, stored in three quadwords
- */
-static inline void vmathP3LoadXYZArray( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads );
-
-/*
- * Store four 3-D points in three quadwords
- */
-static inline void vmathP3StoreXYZArray( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, vec_float4 *threeQuads );
-
-/*
- * Store eight 3-D points as half-floats
- */
-static inline void vmathP3StoreHalfFloats( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, const VmathPoint3 *pnt4, const VmathPoint3 *pnt5, const VmathPoint3 *pnt6, const VmathPoint3 *pnt7, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D point
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathP3Print( const VmathPoint3 *pnt );
-
-/*
- * Print a 3-D point and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name );
-
-#endif
-
-/*
- * Copy a quaternion
- */
-static inline void vmathQCopy( VmathQuat *result, const VmathQuat *quat );
-
-/*
- * Construct a quaternion from x, y, z, and w elements
- */
-static inline void vmathQMakeFromElems( VmathQuat *result, float x, float y, float z, float w );
-
-/*
- * Construct a quaternion from a 3-D vector and a scalar
- */
-static inline void vmathQMakeFromV3Scalar( VmathQuat *result, const VmathVector3 *xyz, float w );
-
-/*
- * Copy elements from a 4-D vector into a quaternion
- */
-static inline void vmathQMakeFromV4( VmathQuat *result, const VmathVector4 *vec );
-
-/*
- * Convert a rotation matrix to a unit-length quaternion
- */
-static inline void vmathQMakeFromM3( VmathQuat *result, const VmathMatrix3 *rotMat );
-
-/*
- * Set all elements of a quaternion to the same scalar value
- */
-static inline void vmathQMakeFromScalar( VmathQuat *result, float scalar );
-
-/*
- * Set vector float data in a quaternion
- */
-static inline void vmathQMakeFrom128( VmathQuat *result, vec_float4 vf4 );
-
-/*
- * Get vector float data from a quaternion
- */
-static inline vec_float4 vmathQGet128( const VmathQuat *quat );
-
-/*
- * Set the x, y, and z elements of a quaternion
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathQSetXYZ( VmathQuat *result, const VmathVector3 *vec );
-
-/*
- * Get the x, y, and z elements of a quaternion
- */
-static inline void vmathQGetXYZ( VmathVector3 *result, const VmathQuat *quat );
-
-/*
- * Set the x element of a quaternion
- */
-static inline void vmathQSetX( VmathQuat *result, float x );
-
-/*
- * Set the y element of a quaternion
- */
-static inline void vmathQSetY( VmathQuat *result, float y );
-
-/*
- * Set the z element of a quaternion
- */
-static inline void vmathQSetZ( VmathQuat *result, float z );
-
-/*
- * Set the w element of a quaternion
- */
-static inline void vmathQSetW( VmathQuat *result, float w );
-
-/*
- * Get the x element of a quaternion
- */
-static inline float vmathQGetX( const VmathQuat *quat );
-
-/*
- * Get the y element of a quaternion
- */
-static inline float vmathQGetY( const VmathQuat *quat );
-
-/*
- * Get the z element of a quaternion
- */
-static inline float vmathQGetZ( const VmathQuat *quat );
-
-/*
- * Get the w element of a quaternion
- */
-static inline float vmathQGetW( const VmathQuat *quat );
-
-/*
- * Set an x, y, z, or w element of a quaternion by index
- */
-static inline void vmathQSetElem( VmathQuat *result, int idx, float value );
-
-/*
- * Get an x, y, z, or w element of a quaternion by index
- */
-static inline float vmathQGetElem( const VmathQuat *quat, int idx );
-
-/*
- * Add two quaternions
- */
-static inline void vmathQAdd( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Subtract a quaternion from another quaternion
- */
-static inline void vmathQSub( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Multiply two quaternions
- */
-static inline void vmathQMul( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Multiply a quaternion by a scalar
- */
-static inline void vmathQScalarMul( VmathQuat *result, const VmathQuat *quat, float scalar );
-
-/*
- * Divide a quaternion by a scalar
- */
-static inline void vmathQScalarDiv( VmathQuat *result, const VmathQuat *quat, float scalar );
-
-/*
- * Negate all elements of a quaternion
- */
-static inline void vmathQNeg( VmathQuat *result, const VmathQuat *quat );
-
-/*
- * Construct an identity quaternion
- */
-static inline void vmathQMakeIdentity( VmathQuat *result );
-
-/*
- * Construct a quaternion to rotate between two unit-length 3-D vectors
- * NOTE: 
- * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
- */
-static inline void vmathQMakeRotationArc( VmathQuat *result, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 );
-
-/*
- * Construct a quaternion to rotate around a unit-length 3-D vector
- */
-static inline void vmathQMakeRotationAxis( VmathQuat *result, float radians, const VmathVector3 *unitVec );
-
-/*
- * Construct a quaternion to rotate around the x axis
- */
-static inline void vmathQMakeRotationX( VmathQuat *result, float radians );
-
-/*
- * Construct a quaternion to rotate around the y axis
- */
-static inline void vmathQMakeRotationY( VmathQuat *result, float radians );
-
-/*
- * Construct a quaternion to rotate around the z axis
- */
-static inline void vmathQMakeRotationZ( VmathQuat *result, float radians );
-
-/*
- * Compute the conjugate of a quaternion
- */
-static inline void vmathQConj( VmathQuat *result, const VmathQuat *quat );
-
-/*
- * Use a unit-length quaternion to rotate a 3-D vector
- */
-static inline void vmathQRotate( VmathVector3 *result, const VmathQuat *unitQuat, const VmathVector3 *vec );
-
-/*
- * Compute the dot product of two quaternions
- */
-static inline float vmathQDot( const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Compute the norm of a quaternion
- */
-static inline float vmathQNorm( const VmathQuat *quat );
-
-/*
- * Compute the length of a quaternion
- */
-static inline float vmathQLength( const VmathQuat *quat );
-
-/*
- * Normalize a quaternion
- * NOTE: 
- * The result is unpredictable when all elements of quat are at or near zero.
- */
-static inline void vmathQNormalize( VmathQuat *result, const VmathQuat *quat );
-
-/*
- * Linear interpolation between two quaternions
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathQLerp( VmathQuat *result, float t, const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Spherical linear interpolation between two quaternions
- * NOTE: 
- * Interpolates along the shortest path between orientations.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathQSlerp( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1 );
-
-/*
- * Spherical quadrangle interpolation
- */
-static inline void vmathQSquad( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1, const VmathQuat *unitQuat2, const VmathQuat *unitQuat3 );
-
-/*
- * Conditionally select between two quaternions
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathQSelect( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a quaternion
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathQPrint( const VmathQuat *quat );
-
-/*
- * Print a quaternion and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathQPrints( const VmathQuat *quat, const char *name );
-
-#endif
-
-/*
- * Copy a 3x3 matrix
- */
-static inline void vmathM3Copy( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Construct a 3x3 matrix containing the specified columns
- */
-static inline void vmathM3MakeFromCols( VmathMatrix3 *result, const VmathVector3 *col0, const VmathVector3 *col1, const VmathVector3 *col2 );
-
-/*
- * Construct a 3x3 rotation matrix from a unit-length quaternion
- */
-static inline void vmathM3MakeFromQ( VmathMatrix3 *result, const VmathQuat *unitQuat );
-
-/*
- * Set all elements of a 3x3 matrix to the same scalar value
- */
-static inline void vmathM3MakeFromScalar( VmathMatrix3 *result, float scalar );
-
-/*
- * Set column 0 of a 3x3 matrix
- */
-static inline void vmathM3SetCol0( VmathMatrix3 *result, const VmathVector3 *col0 );
-
-/*
- * Set column 1 of a 3x3 matrix
- */
-static inline void vmathM3SetCol1( VmathMatrix3 *result, const VmathVector3 *col1 );
-
-/*
- * Set column 2 of a 3x3 matrix
- */
-static inline void vmathM3SetCol2( VmathMatrix3 *result, const VmathVector3 *col2 );
-
-/*
- * Get column 0 of a 3x3 matrix
- */
-static inline void vmathM3GetCol0( VmathVector3 *result, const VmathMatrix3 *mat );
-
-/*
- * Get column 1 of a 3x3 matrix
- */
-static inline void vmathM3GetCol1( VmathVector3 *result, const VmathMatrix3 *mat );
-
-/*
- * Get column 2 of a 3x3 matrix
- */
-static inline void vmathM3GetCol2( VmathVector3 *result, const VmathMatrix3 *mat );
-
-/*
- * Set the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3SetCol( VmathMatrix3 *result, int col, const VmathVector3 *vec );
-
-/*
- * Set the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3SetRow( VmathMatrix3 *result, int row, const VmathVector3 *vec );
-
-/*
- * Get the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3GetCol( VmathVector3 *result, const VmathMatrix3 *mat, int col );
-
-/*
- * Get the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3GetRow( VmathVector3 *result, const VmathMatrix3 *mat, int row );
-
-/*
- * Set the element of a 3x3 matrix referred to by column and row indices
- */
-static inline void vmathM3SetElem( VmathMatrix3 *result, int col, int row, float val );
-
-/*
- * Get the element of a 3x3 matrix referred to by column and row indices
- */
-static inline float vmathM3GetElem( const VmathMatrix3 *mat, int col, int row );
-
-/*
- * Add two 3x3 matrices
- */
-static inline void vmathM3Add( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
-
-/*
- * Subtract a 3x3 matrix from another 3x3 matrix
- */
-static inline void vmathM3Sub( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
-
-/*
- * Negate all elements of a 3x3 matrix
- */
-static inline void vmathM3Neg( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Multiply a 3x3 matrix by a scalar
- */
-static inline void vmathM3ScalarMul( VmathMatrix3 *result, const VmathMatrix3 *mat, float scalar );
-
-/*
- * Multiply a 3x3 matrix by a 3-D vector
- */
-static inline void vmathM3MulV3( VmathVector3 *result, const VmathMatrix3 *mat, const VmathVector3 *vec );
-
-/*
- * Multiply two 3x3 matrices
- */
-static inline void vmathM3Mul( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
-
-/*
- * Construct an identity 3x3 matrix
- */
-static inline void vmathM3MakeIdentity( VmathMatrix3 *result );
-
-/*
- * Construct a 3x3 matrix to rotate around the x axis
- */
-static inline void vmathM3MakeRotationX( VmathMatrix3 *result, float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the y axis
- */
-static inline void vmathM3MakeRotationY( VmathMatrix3 *result, float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the z axis
- */
-static inline void vmathM3MakeRotationZ( VmathMatrix3 *result, float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the x, y, and z axes
- */
-static inline void vmathM3MakeRotationZYX( VmathMatrix3 *result, const VmathVector3 *radiansXYZ );
-
-/*
- * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathM3MakeRotationAxis( VmathMatrix3 *result, float radians, const VmathVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathM3MakeRotationQ( VmathMatrix3 *result, const VmathQuat *unitQuat );
-
-/*
- * Construct a 3x3 matrix to perform scaling
- */
-static inline void vmathM3MakeScale( VmathMatrix3 *result, const VmathVector3 *scaleVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathM3AppendScale( VmathMatrix3 *result, const VmathMatrix3 *mat, const VmathVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathM3PrependScale( VmathMatrix3 *result, const VmathVector3 *scaleVec, const VmathMatrix3 *mat );
-
-/*
- * Multiply two 3x3 matrices per element
- */
-static inline void vmathM3MulPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
-
-/*
- * Compute the absolute value of a 3x3 matrix per element
- */
-static inline void vmathM3AbsPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Transpose of a 3x3 matrix
- */
-static inline void vmathM3Transpose( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Compute the inverse of a 3x3 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathM3Inverse( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Determinant of a 3x3 matrix
- */
-static inline float vmathM3Determinant( const VmathMatrix3 *mat );
-
-/*
- * Conditionally select between two 3x3 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathM3Select( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x3 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM3Print( const VmathMatrix3 *mat );
-
-/*
- * Print a 3x3 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM3Prints( const VmathMatrix3 *mat, const char *name );
-
-#endif
-
-/*
- * Copy a 4x4 matrix
- */
-static inline void vmathM4Copy( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Construct a 4x4 matrix containing the specified columns
- */
-static inline void vmathM4MakeFromCols( VmathMatrix4 *result, const VmathVector4 *col0, const VmathVector4 *col1, const VmathVector4 *col2, const VmathVector4 *col3 );
-
-/*
- * Construct a 4x4 matrix from a 3x4 transformation matrix
- */
-static inline void vmathM4MakeFromT3( VmathMatrix4 *result, const VmathTransform3 *mat );
-
-/*
- * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
- */
-static inline void vmathM4MakeFromM3V3( VmathMatrix4 *result, const VmathMatrix3 *mat, const VmathVector3 *translateVec );
-
-/*
- * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
- */
-static inline void vmathM4MakeFromQV3( VmathMatrix4 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec );
-
-/*
- * Set all elements of a 4x4 matrix to the same scalar value
- */
-static inline void vmathM4MakeFromScalar( VmathMatrix4 *result, float scalar );
-
-/*
- * Set the upper-left 3x3 submatrix
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathM4SetUpper3x3( VmathMatrix4 *result, const VmathMatrix3 *mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 4x4 matrix
- */
-static inline void vmathM4GetUpper3x3( VmathMatrix3 *result, const VmathMatrix4 *mat );
-
-/*
- * Set translation component
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathM4SetTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec );
-
-/*
- * Get the translation component of a 4x4 matrix
- */
-static inline void vmathM4GetTranslation( VmathVector3 *result, const VmathMatrix4 *mat );
-
-/*
- * Set column 0 of a 4x4 matrix
- */
-static inline void vmathM4SetCol0( VmathMatrix4 *result, const VmathVector4 *col0 );
-
-/*
- * Set column 1 of a 4x4 matrix
- */
-static inline void vmathM4SetCol1( VmathMatrix4 *result, const VmathVector4 *col1 );
-
-/*
- * Set column 2 of a 4x4 matrix
- */
-static inline void vmathM4SetCol2( VmathMatrix4 *result, const VmathVector4 *col2 );
-
-/*
- * Set column 3 of a 4x4 matrix
- */
-static inline void vmathM4SetCol3( VmathMatrix4 *result, const VmathVector4 *col3 );
-
-/*
- * Get column 0 of a 4x4 matrix
- */
-static inline void vmathM4GetCol0( VmathVector4 *result, const VmathMatrix4 *mat );
-
-/*
- * Get column 1 of a 4x4 matrix
- */
-static inline void vmathM4GetCol1( VmathVector4 *result, const VmathMatrix4 *mat );
-
-/*
- * Get column 2 of a 4x4 matrix
- */
-static inline void vmathM4GetCol2( VmathVector4 *result, const VmathMatrix4 *mat );
-
-/*
- * Get column 3 of a 4x4 matrix
- */
-static inline void vmathM4GetCol3( VmathVector4 *result, const VmathMatrix4 *mat );
-
-/*
- * Set the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4SetCol( VmathMatrix4 *result, int col, const VmathVector4 *vec );
-
-/*
- * Set the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4SetRow( VmathMatrix4 *result, int row, const VmathVector4 *vec );
-
-/*
- * Get the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4GetCol( VmathVector4 *result, const VmathMatrix4 *mat, int col );
-
-/*
- * Get the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4GetRow( VmathVector4 *result, const VmathMatrix4 *mat, int row );
-
-/*
- * Set the element of a 4x4 matrix referred to by column and row indices
- */
-static inline void vmathM4SetElem( VmathMatrix4 *result, int col, int row, float val );
-
-/*
- * Get the element of a 4x4 matrix referred to by column and row indices
- */
-static inline float vmathM4GetElem( const VmathMatrix4 *mat, int col, int row );
-
-/*
- * Add two 4x4 matrices
- */
-static inline void vmathM4Add( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
-
-/*
- * Subtract a 4x4 matrix from another 4x4 matrix
- */
-static inline void vmathM4Sub( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
-
-/*
- * Negate all elements of a 4x4 matrix
- */
-static inline void vmathM4Neg( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Multiply a 4x4 matrix by a scalar
- */
-static inline void vmathM4ScalarMul( VmathMatrix4 *result, const VmathMatrix4 *mat, float scalar );
-
-/*
- * Multiply a 4x4 matrix by a 4-D vector
- */
-static inline void vmathM4MulV4( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector4 *vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D vector
- */
-static inline void vmathM4MulV3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector3 *vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D point
- */
-static inline void vmathM4MulP3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathPoint3 *pnt );
-
-/*
- * Multiply two 4x4 matrices
- */
-static inline void vmathM4Mul( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
-
-/*
- * Multiply a 4x4 matrix by a 3x4 transformation matrix
- */
-static inline void vmathM4MulT3( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathTransform3 *tfrm );
-
-/*
- * Construct an identity 4x4 matrix
- */
-static inline void vmathM4MakeIdentity( VmathMatrix4 *result );
-
-/*
- * Construct a 4x4 matrix to rotate around the x axis
- */
-static inline void vmathM4MakeRotationX( VmathMatrix4 *result, float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the y axis
- */
-static inline void vmathM4MakeRotationY( VmathMatrix4 *result, float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the z axis
- */
-static inline void vmathM4MakeRotationZ( VmathMatrix4 *result, float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the x, y, and z axes
- */
-static inline void vmathM4MakeRotationZYX( VmathMatrix4 *result, const VmathVector3 *radiansXYZ );
-
-/*
- * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathM4MakeRotationAxis( VmathMatrix4 *result, float radians, const VmathVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathM4MakeRotationQ( VmathMatrix4 *result, const VmathQuat *unitQuat );
-
-/*
- * Construct a 4x4 matrix to perform scaling
- */
-static inline void vmathM4MakeScale( VmathMatrix4 *result, const VmathVector3 *scaleVec );
-
-/*
- * Construct a 4x4 matrix to perform translation
- */
-static inline void vmathM4MakeTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec );
-
-/*
- * Construct viewing matrix based on eye position, position looked at, and up direction
- */
-static inline void vmathM4MakeLookAt( VmathMatrix4 *result, const VmathPoint3 *eyePos, const VmathPoint3 *lookAtPos, const VmathVector3 *upVec );
-
-/*
- * Construct a perspective projection matrix
- */
-static inline void vmathM4MakePerspective( VmathMatrix4 *result, float fovyRadians, float aspect, float zNear, float zFar );
-
-/*
- * Construct a perspective projection matrix based on frustum
- */
-static inline void vmathM4MakeFrustum( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar );
-
-/*
- * Construct an orthographic projection matrix
- */
-static inline void vmathM4MakeOrthographic( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar );
-
-/*
- * Append (post-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathM4AppendScale( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathM4PrependScale( VmathMatrix4 *result, const VmathVector3 *scaleVec, const VmathMatrix4 *mat );
-
-/*
- * Multiply two 4x4 matrices per element
- */
-static inline void vmathM4MulPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
-
-/*
- * Compute the absolute value of a 4x4 matrix per element
- */
-static inline void vmathM4AbsPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Transpose of a 4x4 matrix
- */
-static inline void vmathM4Transpose( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathM4Inverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathM4AffineInverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
- */
-static inline void vmathM4OrthoInverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Determinant of a 4x4 matrix
- */
-static inline float vmathM4Determinant( const VmathMatrix4 *mat );
-
-/*
- * Conditionally select between two 4x4 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathM4Select( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4x4 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM4Print( const VmathMatrix4 *mat );
-
-/*
- * Print a 4x4 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM4Prints( const VmathMatrix4 *mat, const char *name );
-
-#endif
-
-/*
- * Copy a 3x4 transformation matrix
- */
-static inline void vmathT3Copy( VmathTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Construct a 3x4 transformation matrix containing the specified columns
- */
-static inline void vmathT3MakeFromCols( VmathTransform3 *result, const VmathVector3 *col0, const VmathVector3 *col1, const VmathVector3 *col2, const VmathVector3 *col3 );
-
-/*
- * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
- */
-static inline void vmathT3MakeFromM3V3( VmathTransform3 *result, const VmathMatrix3 *tfrm, const VmathVector3 *translateVec );
-
-/*
- * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
- */
-static inline void vmathT3MakeFromQV3( VmathTransform3 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec );
-
-/*
- * Set all elements of a 3x4 transformation matrix to the same scalar value
- */
-static inline void vmathT3MakeFromScalar( VmathTransform3 *result, float scalar );
-
-/*
- * Set the upper-left 3x3 submatrix
- */
-static inline void vmathT3SetUpper3x3( VmathTransform3 *result, const VmathMatrix3 *mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
- */
-static inline void vmathT3GetUpper3x3( VmathMatrix3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Set translation component
- */
-static inline void vmathT3SetTranslation( VmathTransform3 *result, const VmathVector3 *translateVec );
-
-/*
- * Get the translation component of a 3x4 transformation matrix
- */
-static inline void vmathT3GetTranslation( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Set column 0 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol0( VmathTransform3 *result, const VmathVector3 *col0 );
-
-/*
- * Set column 1 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol1( VmathTransform3 *result, const VmathVector3 *col1 );
-
-/*
- * Set column 2 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol2( VmathTransform3 *result, const VmathVector3 *col2 );
-
-/*
- * Set column 3 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol3( VmathTransform3 *result, const VmathVector3 *col3 );
-
-/*
- * Get column 0 of a 3x4 transformation matrix
- */
-static inline void vmathT3GetCol0( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Get column 1 of a 3x4 transformation matrix
- */
-static inline void vmathT3GetCol1( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Get column 2 of a 3x4 transformation matrix
- */
-static inline void vmathT3GetCol2( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Get column 3 of a 3x4 transformation matrix
- */
-static inline void vmathT3GetCol3( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Set the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3SetCol( VmathTransform3 *result, int col, const VmathVector3 *vec );
-
-/*
- * Set the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3SetRow( VmathTransform3 *result, int row, const VmathVector4 *vec );
-
-/*
- * Get the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3GetCol( VmathVector3 *result, const VmathTransform3 *tfrm, int col );
-
-/*
- * Get the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3GetRow( VmathVector4 *result, const VmathTransform3 *tfrm, int row );
-
-/*
- * Set the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline void vmathT3SetElem( VmathTransform3 *result, int col, int row, float val );
-
-/*
- * Get the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline float vmathT3GetElem( const VmathTransform3 *tfrm, int col, int row );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D vector
- */
-static inline void vmathT3MulV3( VmathVector3 *result, const VmathTransform3 *tfrm, const VmathVector3 *vec );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D point
- */
-static inline void vmathT3MulP3( VmathPoint3 *result, const VmathTransform3 *tfrm, const VmathPoint3 *pnt );
-
-/*
- * Multiply two 3x4 transformation matrices
- */
-static inline void vmathT3Mul( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 );
-
-/*
- * Construct an identity 3x4 transformation matrix
- */
-static inline void vmathT3MakeIdentity( VmathTransform3 *result );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x axis
- */
-static inline void vmathT3MakeRotationX( VmathTransform3 *result, float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the y axis
- */
-static inline void vmathT3MakeRotationY( VmathTransform3 *result, float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the z axis
- */
-static inline void vmathT3MakeRotationZ( VmathTransform3 *result, float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
- */
-static inline void vmathT3MakeRotationZYX( VmathTransform3 *result, const VmathVector3 *radiansXYZ );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathT3MakeRotationAxis( VmathTransform3 *result, float radians, const VmathVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathT3MakeRotationQ( VmathTransform3 *result, const VmathQuat *unitQuat );
-
-/*
- * Construct a 3x4 transformation matrix to perform scaling
- */
-static inline void vmathT3MakeScale( VmathTransform3 *result, const VmathVector3 *scaleVec );
-
-/*
- * Construct a 3x4 transformation matrix to perform translation
- */
-static inline void vmathT3MakeTranslation( VmathTransform3 *result, const VmathVector3 *translateVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathT3AppendScale( VmathTransform3 *result, const VmathTransform3 *tfrm, const VmathVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathT3PrependScale( VmathTransform3 *result, const VmathVector3 *scaleVec, const VmathTransform3 *tfrm );
-
-/*
- * Multiply two 3x4 transformation matrices per element
- */
-static inline void vmathT3MulPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 );
-
-/*
- * Compute the absolute value of a 3x4 transformation matrix per element
- */
-static inline void vmathT3AbsPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Inverse of a 3x4 transformation matrix
- * NOTE: 
- * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
- */
-static inline void vmathT3Inverse( VmathTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
- */
-static inline void vmathT3OrthoInverse( VmathTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Conditionally select between two 3x4 transformation matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathT3Select( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x4 transformation matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathT3Print( const VmathTransform3 *tfrm );
-
-/*
- * Print a 3x4 transformation matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathT3Prints( const VmathTransform3 *tfrm, const char *name );
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#include "vec_aos.h"
-#include "quat_aos.h"
-#include "mat_aos.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_AOS_C_SPU_H
+#define _VECTORMATH_AOS_C_SPU_H
+
+#include <math.h>
+#include <simdmath.h>
+#include <stdio.h>
+
+#ifdef _VECTORMATH_DEBUG
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef _VECTORMATH_AOS_C_TYPES_H
+#define _VECTORMATH_AOS_C_TYPES_H
+
+/* A 3-D vector in array-of-structures format
+ */
+typedef struct _VmathVector3
+{
+    vec_float4 vec128;
+} VmathVector3;
+
+/* A 4-D vector in array-of-structures format
+ */
+typedef struct _VmathVector4
+{
+    vec_float4 vec128;
+} VmathVector4;
+
+/* A 3-D point in array-of-structures format
+ */
+typedef struct _VmathPoint3
+{
+    vec_float4 vec128;
+} VmathPoint3;
+
+/* A quaternion in array-of-structures format
+ */
+typedef struct _VmathQuat
+{
+    vec_float4 vec128;
+} VmathQuat;
+
+/* A 3x3 matrix in array-of-structures format
+ */
+typedef struct _VmathMatrix3
+{
+    VmathVector3 col0;
+    VmathVector3 col1;
+    VmathVector3 col2;
+} VmathMatrix3;
+
+/* A 4x4 matrix in array-of-structures format
+ */
+typedef struct _VmathMatrix4
+{
+    VmathVector4 col0;
+    VmathVector4 col1;
+    VmathVector4 col2;
+    VmathVector4 col3;
+} VmathMatrix4;
+
+/* A 3x4 transformation matrix in array-of-structures format
+ */
+typedef struct _VmathTransform3
+{
+    VmathVector3 col0;
+    VmathVector3 col1;
+    VmathVector3 col2;
+    VmathVector3 col3;
+} VmathTransform3;
+
+#endif
+
+/*
+ * Copy a 3-D vector
+ */
+static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Construct a 3-D vector from x, y, and z elements
+ */
+static inline void vmathV3MakeFromElems( VmathVector3 *result, float x, float y, float z );
+
+/*
+ * Copy elements from a 3-D point into a 3-D vector
+ */
+static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Set all elements of a 3-D vector to the same scalar value
+ */
+static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar );
+
+/*
+ * Set vector float data in a 3-D vector
+ */
+static inline void vmathV3MakeFrom128( VmathVector3 *result, vec_float4 vf4 );
+
+/*
+ * Get vector float data from a 3-D vector
+ */
+static inline vec_float4 vmathV3Get128( const VmathVector3 *vec );
+
+/*
+ * Set the x element of a 3-D vector
+ */
+static inline void vmathV3SetX( VmathVector3 *result, float x );
+
+/*
+ * Set the y element of a 3-D vector
+ */
+static inline void vmathV3SetY( VmathVector3 *result, float y );
+
+/*
+ * Set the z element of a 3-D vector
+ */
+static inline void vmathV3SetZ( VmathVector3 *result, float z );
+
+/*
+ * Get the x element of a 3-D vector
+ */
+static inline float vmathV3GetX( const VmathVector3 *vec );
+
+/*
+ * Get the y element of a 3-D vector
+ */
+static inline float vmathV3GetY( const VmathVector3 *vec );
+
+/*
+ * Get the z element of a 3-D vector
+ */
+static inline float vmathV3GetZ( const VmathVector3 *vec );
+
+/*
+ * Set an x, y, or z element of a 3-D vector by index
+ */
+static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value );
+
+/*
+ * Get an x, y, or z element of a 3-D vector by index
+ */
+static inline float vmathV3GetElem( const VmathVector3 *vec, int idx );
+
+/*
+ * Add two 3-D vectors
+ */
+static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Subtract a 3-D vector from another 3-D vector
+ */
+static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Add a 3-D vector to a 3-D point
+ */
+static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt );
+
+/*
+ * Multiply a 3-D vector by a scalar
+ */
+static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar );
+
+/*
+ * Divide a 3-D vector by a scalar
+ */
+static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar );
+
+/*
+ * Negate all elements of a 3-D vector
+ */
+static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Construct x axis
+ */
+static inline void vmathV3MakeXAxis( VmathVector3 *result );
+
+/*
+ * Construct y axis
+ */
+static inline void vmathV3MakeYAxis( VmathVector3 *result );
+
+/*
+ * Construct z axis
+ */
+static inline void vmathV3MakeZAxis( VmathVector3 *result );
+
+/*
+ * Multiply two 3-D vectors per element
+ */
+static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Divide two 3-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Compute the reciprocal of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Compute the square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Compute the reciprocal square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Compute the absolute value of a 3-D vector per element
+ */
+static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Copy sign from one 3-D vector to another, per element
+ */
+static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Maximum of two 3-D vectors per element
+ */
+static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Minimum of two 3-D vectors per element
+ */
+static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Maximum element of a 3-D vector
+ */
+static inline float vmathV3MaxElem( const VmathVector3 *vec );
+
+/*
+ * Minimum element of a 3-D vector
+ */
+static inline float vmathV3MinElem( const VmathVector3 *vec );
+
+/*
+ * Compute the sum of all elements of a 3-D vector
+ */
+static inline float vmathV3Sum( const VmathVector3 *vec );
+
+/*
+ * Compute the dot product of two 3-D vectors
+ */
+static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Compute the square of the length of a 3-D vector
+ */
+static inline float vmathV3LengthSqr( const VmathVector3 *vec );
+
+/*
+ * Compute the length of a 3-D vector
+ */
+static inline float vmathV3Length( const VmathVector3 *vec );
+
+/*
+ * Normalize a 3-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Compute cross product of two 3-D vectors
+ */
+static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Outer product of two 3-D vectors
+ */
+static inline void vmathV3Outer( VmathMatrix3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Pre-multiply a row vector by a 3x3 matrix
+ * NOTE: 
+ * Slower than column post-multiply.
+ */
+static inline void vmathV3RowMul( VmathVector3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat );
+
+/*
+ * Cross-product matrix of a 3-D vector
+ */
+static inline void vmathV3CrossMatrix( VmathMatrix3 *result, const VmathVector3 *vec );
+
+/*
+ * Create cross-product matrix and multiply
+ * NOTE: 
+ * Faster than separately creating a cross-product matrix and multiplying.
+ */
+static inline void vmathV3CrossMatrixMul( VmathMatrix3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat );
+
+/*
+ * Linear interpolation between two 3-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Spherical linear interpolation between two 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 );
+
+/*
+ * Conditionally select between two 3-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 );
+
+/*
+ * Store x, y, and z elements of a 3-D vector in the first three words of a quadword.
+ * The value of the fourth word (the word with the highest address) remains unchanged
+ */
+static inline void vmathV3StoreXYZ( const VmathVector3 *vec, vec_float4 *quad );
+
+/*
+ * Load four three-float 3-D vectors, stored in three quadwords
+ */
+static inline void vmathV3LoadXYZArray( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads );
+
+/*
+ * Store four 3-D vectors in three quadwords
+ */
+static inline void vmathV3StoreXYZArray( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, vec_float4 *threeQuads );
+
+/*
+ * Store eight 3-D vectors as half-floats
+ */
+static inline void vmathV3StoreHalfFloats( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, const VmathVector3 *vec4, const VmathVector3 *vec5, const VmathVector3 *vec6, const VmathVector3 *vec7, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV3Print( const VmathVector3 *vec );
+
+/*
+ * Print a 3-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV3Prints( const VmathVector3 *vec, const char *name );
+
+#endif
+
+/*
+ * Copy a 4-D vector
+ */
+static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Construct a 4-D vector from x, y, z, and w elements
+ */
+static inline void vmathV4MakeFromElems( VmathVector4 *result, float x, float y, float z, float w );
+
+/*
+ * Construct a 4-D vector from a 3-D vector and a scalar
+ */
+static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float w );
+
+/*
+ * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+ */
+static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec );
+
+/*
+ * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+ */
+static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt );
+
+/*
+ * Copy elements from a quaternion into a 4-D vector
+ */
+static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat );
+
+/*
+ * Set all elements of a 4-D vector to the same scalar value
+ */
+static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar );
+
+/*
+ * Set vector float data in a 4-D vector
+ */
+static inline void vmathV4MakeFrom128( VmathVector4 *result, vec_float4 vf4 );
+
+/*
+ * Get vector float data from a 4-D vector
+ */
+static inline vec_float4 vmathV4Get128( const VmathVector4 *vec );
+
+/*
+ * Set the x, y, and z elements of a 4-D vector
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec );
+
+/*
+ * Get the x, y, and z elements of a 4-D vector
+ */
+static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec );
+
+/*
+ * Set the x element of a 4-D vector
+ */
+static inline void vmathV4SetX( VmathVector4 *result, float x );
+
+/*
+ * Set the y element of a 4-D vector
+ */
+static inline void vmathV4SetY( VmathVector4 *result, float y );
+
+/*
+ * Set the z element of a 4-D vector
+ */
+static inline void vmathV4SetZ( VmathVector4 *result, float z );
+
+/*
+ * Set the w element of a 4-D vector
+ */
+static inline void vmathV4SetW( VmathVector4 *result, float w );
+
+/*
+ * Get the x element of a 4-D vector
+ */
+static inline float vmathV4GetX( const VmathVector4 *vec );
+
+/*
+ * Get the y element of a 4-D vector
+ */
+static inline float vmathV4GetY( const VmathVector4 *vec );
+
+/*
+ * Get the z element of a 4-D vector
+ */
+static inline float vmathV4GetZ( const VmathVector4 *vec );
+
+/*
+ * Get the w element of a 4-D vector
+ */
+static inline float vmathV4GetW( const VmathVector4 *vec );
+
+/*
+ * Set an x, y, z, or w element of a 4-D vector by index
+ */
+static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value );
+
+/*
+ * Get an x, y, z, or w element of a 4-D vector by index
+ */
+static inline float vmathV4GetElem( const VmathVector4 *vec, int idx );
+
+/*
+ * Add two 4-D vectors
+ */
+static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Subtract a 4-D vector from another 4-D vector
+ */
+static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Multiply a 4-D vector by a scalar
+ */
+static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar );
+
+/*
+ * Divide a 4-D vector by a scalar
+ */
+static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar );
+
+/*
+ * Negate all elements of a 4-D vector
+ */
+static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Construct x axis
+ */
+static inline void vmathV4MakeXAxis( VmathVector4 *result );
+
+/*
+ * Construct y axis
+ */
+static inline void vmathV4MakeYAxis( VmathVector4 *result );
+
+/*
+ * Construct z axis
+ */
+static inline void vmathV4MakeZAxis( VmathVector4 *result );
+
+/*
+ * Construct w axis
+ */
+static inline void vmathV4MakeWAxis( VmathVector4 *result );
+
+/*
+ * Multiply two 4-D vectors per element
+ */
+static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Divide two 4-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Compute the reciprocal of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Compute the square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Compute the reciprocal square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Compute the absolute value of a 4-D vector per element
+ */
+static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Copy sign from one 4-D vector to another, per element
+ */
+static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Maximum of two 4-D vectors per element
+ */
+static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Minimum of two 4-D vectors per element
+ */
+static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Maximum element of a 4-D vector
+ */
+static inline float vmathV4MaxElem( const VmathVector4 *vec );
+
+/*
+ * Minimum element of a 4-D vector
+ */
+static inline float vmathV4MinElem( const VmathVector4 *vec );
+
+/*
+ * Compute the sum of all elements of a 4-D vector
+ */
+static inline float vmathV4Sum( const VmathVector4 *vec );
+
+/*
+ * Compute the dot product of two 4-D vectors
+ */
+static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Compute the square of the length of a 4-D vector
+ */
+static inline float vmathV4LengthSqr( const VmathVector4 *vec );
+
+/*
+ * Compute the length of a 4-D vector
+ */
+static inline float vmathV4Length( const VmathVector4 *vec );
+
+/*
+ * Normalize a 4-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Outer product of two 4-D vectors
+ */
+static inline void vmathV4Outer( VmathMatrix4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Linear interpolation between two 4-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Spherical linear interpolation between two 4-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 );
+
+/*
+ * Conditionally select between two 4-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 );
+
+/*
+ * Store four 4-D vectors as half-floats
+ */
+static inline void vmathV4StoreHalfFloats( const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3, vec_ushort8 *twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV4Print( const VmathVector4 *vec );
+
+/*
+ * Print a 4-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV4Prints( const VmathVector4 *vec, const char *name );
+
+#endif
+
+/*
+ * Copy a 3-D point
+ */
+static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Construct a 3-D point from x, y, and z elements
+ */
+static inline void vmathP3MakeFromElems( VmathPoint3 *result, float x, float y, float z );
+
+/*
+ * Copy elements from a 3-D vector into a 3-D point
+ */
+static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec );
+
+/*
+ * Set all elements of a 3-D point to the same scalar value
+ */
+static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar );
+
+/*
+ * Set vector float data in a 3-D point
+ */
+static inline void vmathP3MakeFrom128( VmathPoint3 *result, vec_float4 vf4 );
+
+/*
+ * Get vector float data from a 3-D point
+ */
+static inline vec_float4 vmathP3Get128( const VmathPoint3 *pnt );
+
+/*
+ * Set the x element of a 3-D point
+ */
+static inline void vmathP3SetX( VmathPoint3 *result, float x );
+
+/*
+ * Set the y element of a 3-D point
+ */
+static inline void vmathP3SetY( VmathPoint3 *result, float y );
+
+/*
+ * Set the z element of a 3-D point
+ */
+static inline void vmathP3SetZ( VmathPoint3 *result, float z );
+
+/*
+ * Get the x element of a 3-D point
+ */
+static inline float vmathP3GetX( const VmathPoint3 *pnt );
+
+/*
+ * Get the y element of a 3-D point
+ */
+static inline float vmathP3GetY( const VmathPoint3 *pnt );
+
+/*
+ * Get the z element of a 3-D point
+ */
+static inline float vmathP3GetZ( const VmathPoint3 *pnt );
+
+/*
+ * Set an x, y, or z element of a 3-D point by index
+ */
+static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value );
+
+/*
+ * Get an x, y, or z element of a 3-D point by index
+ */
+static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx );
+
+/*
+ * Subtract a 3-D point from another 3-D point
+ */
+static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Add a 3-D point to a 3-D vector
+ */
+static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec );
+
+/*
+ * Subtract a 3-D vector from a 3-D point
+ */
+static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec );
+
+/*
+ * Multiply two 3-D points per element
+ */
+static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Divide two 3-D points per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Compute the reciprocal of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Compute the square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Compute the reciprocal square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Compute the absolute value of a 3-D point per element
+ */
+static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Copy sign from one 3-D point to another, per element
+ */
+static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Maximum of two 3-D points per element
+ */
+static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Minimum of two 3-D points per element
+ */
+static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Maximum element of a 3-D point
+ */
+static inline float vmathP3MaxElem( const VmathPoint3 *pnt );
+
+/*
+ * Minimum element of a 3-D point
+ */
+static inline float vmathP3MinElem( const VmathPoint3 *pnt );
+
+/*
+ * Compute the sum of all elements of a 3-D point
+ */
+static inline float vmathP3Sum( const VmathPoint3 *pnt );
+
+/*
+ * Apply uniform scale to a 3-D point
+ */
+static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal );
+
+/*
+ * Apply non-uniform scale to a 3-D point
+ */
+static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec );
+
+/*
+ * Scalar projection of a 3-D point on a unit-length 3-D vector
+ */
+static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec );
+
+/*
+ * Compute the square of the distance of a 3-D point from the coordinate-system origin
+ */
+static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt );
+
+/*
+ * Compute the distance of a 3-D point from the coordinate-system origin
+ */
+static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt );
+
+/*
+ * Compute the square of the distance between two 3-D points
+ */
+static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Compute the distance between two 3-D points
+ */
+static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Linear interpolation between two 3-D points
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Conditionally select between two 3-D points
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 );
+
+/*
+ * Store x, y, and z elements of a 3-D point in the first three words of a quadword.
+ * The value of the fourth word (the word with the highest address) remains unchanged
+ */
+static inline void vmathP3StoreXYZ( const VmathPoint3 *pnt, vec_float4 *quad );
+
+/*
+ * Load four three-float 3-D points, stored in three quadwords
+ */
+static inline void vmathP3LoadXYZArray( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads );
+
+/*
+ * Store four 3-D points in three quadwords
+ */
+static inline void vmathP3StoreXYZArray( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, vec_float4 *threeQuads );
+
+/*
+ * Store eight 3-D points as half-floats
+ */
+static inline void vmathP3StoreHalfFloats( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, const VmathPoint3 *pnt4, const VmathPoint3 *pnt5, const VmathPoint3 *pnt6, const VmathPoint3 *pnt7, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D point
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathP3Print( const VmathPoint3 *pnt );
+
+/*
+ * Print a 3-D point and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name );
+
+#endif
+
+/*
+ * Copy a quaternion
+ */
+static inline void vmathQCopy( VmathQuat *result, const VmathQuat *quat );
+
+/*
+ * Construct a quaternion from x, y, z, and w elements
+ */
+static inline void vmathQMakeFromElems( VmathQuat *result, float x, float y, float z, float w );
+
+/*
+ * Construct a quaternion from a 3-D vector and a scalar
+ */
+static inline void vmathQMakeFromV3Scalar( VmathQuat *result, const VmathVector3 *xyz, float w );
+
+/*
+ * Copy elements from a 4-D vector into a quaternion
+ */
+static inline void vmathQMakeFromV4( VmathQuat *result, const VmathVector4 *vec );
+
+/*
+ * Convert a rotation matrix to a unit-length quaternion
+ */
+static inline void vmathQMakeFromM3( VmathQuat *result, const VmathMatrix3 *rotMat );
+
+/*
+ * Set all elements of a quaternion to the same scalar value
+ */
+static inline void vmathQMakeFromScalar( VmathQuat *result, float scalar );
+
+/*
+ * Set vector float data in a quaternion
+ */
+static inline void vmathQMakeFrom128( VmathQuat *result, vec_float4 vf4 );
+
+/*
+ * Get vector float data from a quaternion
+ */
+static inline vec_float4 vmathQGet128( const VmathQuat *quat );
+
+/*
+ * Set the x, y, and z elements of a quaternion
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathQSetXYZ( VmathQuat *result, const VmathVector3 *vec );
+
+/*
+ * Get the x, y, and z elements of a quaternion
+ */
+static inline void vmathQGetXYZ( VmathVector3 *result, const VmathQuat *quat );
+
+/*
+ * Set the x element of a quaternion
+ */
+static inline void vmathQSetX( VmathQuat *result, float x );
+
+/*
+ * Set the y element of a quaternion
+ */
+static inline void vmathQSetY( VmathQuat *result, float y );
+
+/*
+ * Set the z element of a quaternion
+ */
+static inline void vmathQSetZ( VmathQuat *result, float z );
+
+/*
+ * Set the w element of a quaternion
+ */
+static inline void vmathQSetW( VmathQuat *result, float w );
+
+/*
+ * Get the x element of a quaternion
+ */
+static inline float vmathQGetX( const VmathQuat *quat );
+
+/*
+ * Get the y element of a quaternion
+ */
+static inline float vmathQGetY( const VmathQuat *quat );
+
+/*
+ * Get the z element of a quaternion
+ */
+static inline float vmathQGetZ( const VmathQuat *quat );
+
+/*
+ * Get the w element of a quaternion
+ */
+static inline float vmathQGetW( const VmathQuat *quat );
+
+/*
+ * Set an x, y, z, or w element of a quaternion by index
+ */
+static inline void vmathQSetElem( VmathQuat *result, int idx, float value );
+
+/*
+ * Get an x, y, z, or w element of a quaternion by index
+ */
+static inline float vmathQGetElem( const VmathQuat *quat, int idx );
+
+/*
+ * Add two quaternions
+ */
+static inline void vmathQAdd( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Subtract a quaternion from another quaternion
+ */
+static inline void vmathQSub( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Multiply two quaternions
+ */
+static inline void vmathQMul( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Multiply a quaternion by a scalar
+ */
+static inline void vmathQScalarMul( VmathQuat *result, const VmathQuat *quat, float scalar );
+
+/*
+ * Divide a quaternion by a scalar
+ */
+static inline void vmathQScalarDiv( VmathQuat *result, const VmathQuat *quat, float scalar );
+
+/*
+ * Negate all elements of a quaternion
+ */
+static inline void vmathQNeg( VmathQuat *result, const VmathQuat *quat );
+
+/*
+ * Construct an identity quaternion
+ */
+static inline void vmathQMakeIdentity( VmathQuat *result );
+
+/*
+ * Construct a quaternion to rotate between two unit-length 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+ */
+static inline void vmathQMakeRotationArc( VmathQuat *result, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 );
+
+/*
+ * Construct a quaternion to rotate around a unit-length 3-D vector
+ */
+static inline void vmathQMakeRotationAxis( VmathQuat *result, float radians, const VmathVector3 *unitVec );
+
+/*
+ * Construct a quaternion to rotate around the x axis
+ */
+static inline void vmathQMakeRotationX( VmathQuat *result, float radians );
+
+/*
+ * Construct a quaternion to rotate around the y axis
+ */
+static inline void vmathQMakeRotationY( VmathQuat *result, float radians );
+
+/*
+ * Construct a quaternion to rotate around the z axis
+ */
+static inline void vmathQMakeRotationZ( VmathQuat *result, float radians );
+
+/*
+ * Compute the conjugate of a quaternion
+ */
+static inline void vmathQConj( VmathQuat *result, const VmathQuat *quat );
+
+/*
+ * Use a unit-length quaternion to rotate a 3-D vector
+ */
+static inline void vmathQRotate( VmathVector3 *result, const VmathQuat *unitQuat, const VmathVector3 *vec );
+
+/*
+ * Compute the dot product of two quaternions
+ */
+static inline float vmathQDot( const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Compute the norm of a quaternion
+ */
+static inline float vmathQNorm( const VmathQuat *quat );
+
+/*
+ * Compute the length of a quaternion
+ */
+static inline float vmathQLength( const VmathQuat *quat );
+
+/*
+ * Normalize a quaternion
+ * NOTE: 
+ * The result is unpredictable when all elements of quat are at or near zero.
+ */
+static inline void vmathQNormalize( VmathQuat *result, const VmathQuat *quat );
+
+/*
+ * Linear interpolation between two quaternions
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathQLerp( VmathQuat *result, float t, const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Spherical linear interpolation between two quaternions
+ * NOTE: 
+ * Interpolates along the shortest path between orientations.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathQSlerp( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1 );
+
+/*
+ * Spherical quadrangle interpolation
+ */
+static inline void vmathQSquad( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1, const VmathQuat *unitQuat2, const VmathQuat *unitQuat3 );
+
+/*
+ * Conditionally select between two quaternions
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathQSelect( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a quaternion
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathQPrint( const VmathQuat *quat );
+
+/*
+ * Print a quaternion and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathQPrints( const VmathQuat *quat, const char *name );
+
+#endif
+
+/*
+ * Copy a 3x3 matrix
+ */
+static inline void vmathM3Copy( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Construct a 3x3 matrix containing the specified columns
+ */
+static inline void vmathM3MakeFromCols( VmathMatrix3 *result, const VmathVector3 *col0, const VmathVector3 *col1, const VmathVector3 *col2 );
+
+/*
+ * Construct a 3x3 rotation matrix from a unit-length quaternion
+ */
+static inline void vmathM3MakeFromQ( VmathMatrix3 *result, const VmathQuat *unitQuat );
+
+/*
+ * Set all elements of a 3x3 matrix to the same scalar value
+ */
+static inline void vmathM3MakeFromScalar( VmathMatrix3 *result, float scalar );
+
+/*
+ * Set column 0 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol0( VmathMatrix3 *result, const VmathVector3 *col0 );
+
+/*
+ * Set column 1 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol1( VmathMatrix3 *result, const VmathVector3 *col1 );
+
+/*
+ * Set column 2 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol2( VmathMatrix3 *result, const VmathVector3 *col2 );
+
+/*
+ * Get column 0 of a 3x3 matrix
+ */
+static inline void vmathM3GetCol0( VmathVector3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Get column 1 of a 3x3 matrix
+ */
+static inline void vmathM3GetCol1( VmathVector3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Get column 2 of a 3x3 matrix
+ */
+static inline void vmathM3GetCol2( VmathVector3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Set the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3SetCol( VmathMatrix3 *result, int col, const VmathVector3 *vec );
+
+/*
+ * Set the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3SetRow( VmathMatrix3 *result, int row, const VmathVector3 *vec );
+
+/*
+ * Get the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3GetCol( VmathVector3 *result, const VmathMatrix3 *mat, int col );
+
+/*
+ * Get the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3GetRow( VmathVector3 *result, const VmathMatrix3 *mat, int row );
+
+/*
+ * Set the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline void vmathM3SetElem( VmathMatrix3 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline float vmathM3GetElem( const VmathMatrix3 *mat, int col, int row );
+
+/*
+ * Add two 3x3 matrices
+ */
+static inline void vmathM3Add( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
+
+/*
+ * Subtract a 3x3 matrix from another 3x3 matrix
+ */
+static inline void vmathM3Sub( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
+
+/*
+ * Negate all elements of a 3x3 matrix
+ */
+static inline void vmathM3Neg( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Multiply a 3x3 matrix by a scalar
+ */
+static inline void vmathM3ScalarMul( VmathMatrix3 *result, const VmathMatrix3 *mat, float scalar );
+
+/*
+ * Multiply a 3x3 matrix by a 3-D vector
+ */
+static inline void vmathM3MulV3( VmathVector3 *result, const VmathMatrix3 *mat, const VmathVector3 *vec );
+
+/*
+ * Multiply two 3x3 matrices
+ */
+static inline void vmathM3Mul( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
+
+/*
+ * Construct an identity 3x3 matrix
+ */
+static inline void vmathM3MakeIdentity( VmathMatrix3 *result );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x axis
+ */
+static inline void vmathM3MakeRotationX( VmathMatrix3 *result, float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the y axis
+ */
+static inline void vmathM3MakeRotationY( VmathMatrix3 *result, float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the z axis
+ */
+static inline void vmathM3MakeRotationZ( VmathMatrix3 *result, float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathM3MakeRotationZYX( VmathMatrix3 *result, const VmathVector3 *radiansXYZ );
+
+/*
+ * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathM3MakeRotationAxis( VmathMatrix3 *result, float radians, const VmathVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathM3MakeRotationQ( VmathMatrix3 *result, const VmathQuat *unitQuat );
+
+/*
+ * Construct a 3x3 matrix to perform scaling
+ */
+static inline void vmathM3MakeScale( VmathMatrix3 *result, const VmathVector3 *scaleVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathM3AppendScale( VmathMatrix3 *result, const VmathMatrix3 *mat, const VmathVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathM3PrependScale( VmathMatrix3 *result, const VmathVector3 *scaleVec, const VmathMatrix3 *mat );
+
+/*
+ * Multiply two 3x3 matrices per element
+ */
+static inline void vmathM3MulPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
+
+/*
+ * Compute the absolute value of a 3x3 matrix per element
+ */
+static inline void vmathM3AbsPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Transpose of a 3x3 matrix
+ */
+static inline void vmathM3Transpose( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Compute the inverse of a 3x3 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathM3Inverse( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Determinant of a 3x3 matrix
+ */
+static inline float vmathM3Determinant( const VmathMatrix3 *mat );
+
+/*
+ * Conditionally select between two 3x3 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathM3Select( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x3 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM3Print( const VmathMatrix3 *mat );
+
+/*
+ * Print a 3x3 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM3Prints( const VmathMatrix3 *mat, const char *name );
+
+#endif
+
+/*
+ * Copy a 4x4 matrix
+ */
+static inline void vmathM4Copy( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Construct a 4x4 matrix containing the specified columns
+ */
+static inline void vmathM4MakeFromCols( VmathMatrix4 *result, const VmathVector4 *col0, const VmathVector4 *col1, const VmathVector4 *col2, const VmathVector4 *col3 );
+
+/*
+ * Construct a 4x4 matrix from a 3x4 transformation matrix
+ */
+static inline void vmathM4MakeFromT3( VmathMatrix4 *result, const VmathTransform3 *mat );
+
+/*
+ * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline void vmathM4MakeFromM3V3( VmathMatrix4 *result, const VmathMatrix3 *mat, const VmathVector3 *translateVec );
+
+/*
+ * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline void vmathM4MakeFromQV3( VmathMatrix4 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec );
+
+/*
+ * Set all elements of a 4x4 matrix to the same scalar value
+ */
+static inline void vmathM4MakeFromScalar( VmathMatrix4 *result, float scalar );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathM4SetUpper3x3( VmathMatrix4 *result, const VmathMatrix3 *mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 4x4 matrix
+ */
+static inline void vmathM4GetUpper3x3( VmathMatrix3 *result, const VmathMatrix4 *mat );
+
+/*
+ * Set translation component
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathM4SetTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec );
+
+/*
+ * Get the translation component of a 4x4 matrix
+ */
+static inline void vmathM4GetTranslation( VmathVector3 *result, const VmathMatrix4 *mat );
+
+/*
+ * Set column 0 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol0( VmathMatrix4 *result, const VmathVector4 *col0 );
+
+/*
+ * Set column 1 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol1( VmathMatrix4 *result, const VmathVector4 *col1 );
+
+/*
+ * Set column 2 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol2( VmathMatrix4 *result, const VmathVector4 *col2 );
+
+/*
+ * Set column 3 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol3( VmathMatrix4 *result, const VmathVector4 *col3 );
+
+/*
+ * Get column 0 of a 4x4 matrix
+ */
+static inline void vmathM4GetCol0( VmathVector4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Get column 1 of a 4x4 matrix
+ */
+static inline void vmathM4GetCol1( VmathVector4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Get column 2 of a 4x4 matrix
+ */
+static inline void vmathM4GetCol2( VmathVector4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Get column 3 of a 4x4 matrix
+ */
+static inline void vmathM4GetCol3( VmathVector4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Set the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4SetCol( VmathMatrix4 *result, int col, const VmathVector4 *vec );
+
+/*
+ * Set the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4SetRow( VmathMatrix4 *result, int row, const VmathVector4 *vec );
+
+/*
+ * Get the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4GetCol( VmathVector4 *result, const VmathMatrix4 *mat, int col );
+
+/*
+ * Get the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4GetRow( VmathVector4 *result, const VmathMatrix4 *mat, int row );
+
+/*
+ * Set the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline void vmathM4SetElem( VmathMatrix4 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline float vmathM4GetElem( const VmathMatrix4 *mat, int col, int row );
+
+/*
+ * Add two 4x4 matrices
+ */
+static inline void vmathM4Add( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
+
+/*
+ * Subtract a 4x4 matrix from another 4x4 matrix
+ */
+static inline void vmathM4Sub( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
+
+/*
+ * Negate all elements of a 4x4 matrix
+ */
+static inline void vmathM4Neg( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Multiply a 4x4 matrix by a scalar
+ */
+static inline void vmathM4ScalarMul( VmathMatrix4 *result, const VmathMatrix4 *mat, float scalar );
+
+/*
+ * Multiply a 4x4 matrix by a 4-D vector
+ */
+static inline void vmathM4MulV4( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector4 *vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D vector
+ */
+static inline void vmathM4MulV3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector3 *vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D point
+ */
+static inline void vmathM4MulP3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathPoint3 *pnt );
+
+/*
+ * Multiply two 4x4 matrices
+ */
+static inline void vmathM4Mul( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
+
+/*
+ * Multiply a 4x4 matrix by a 3x4 transformation matrix
+ */
+static inline void vmathM4MulT3( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathTransform3 *tfrm );
+
+/*
+ * Construct an identity 4x4 matrix
+ */
+static inline void vmathM4MakeIdentity( VmathMatrix4 *result );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x axis
+ */
+static inline void vmathM4MakeRotationX( VmathMatrix4 *result, float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the y axis
+ */
+static inline void vmathM4MakeRotationY( VmathMatrix4 *result, float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the z axis
+ */
+static inline void vmathM4MakeRotationZ( VmathMatrix4 *result, float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathM4MakeRotationZYX( VmathMatrix4 *result, const VmathVector3 *radiansXYZ );
+
+/*
+ * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathM4MakeRotationAxis( VmathMatrix4 *result, float radians, const VmathVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathM4MakeRotationQ( VmathMatrix4 *result, const VmathQuat *unitQuat );
+
+/*
+ * Construct a 4x4 matrix to perform scaling
+ */
+static inline void vmathM4MakeScale( VmathMatrix4 *result, const VmathVector3 *scaleVec );
+
+/*
+ * Construct a 4x4 matrix to perform translation
+ */
+static inline void vmathM4MakeTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec );
+
+/*
+ * Construct viewing matrix based on eye position, position looked at, and up direction
+ */
+static inline void vmathM4MakeLookAt( VmathMatrix4 *result, const VmathPoint3 *eyePos, const VmathPoint3 *lookAtPos, const VmathVector3 *upVec );
+
+/*
+ * Construct a perspective projection matrix
+ */
+static inline void vmathM4MakePerspective( VmathMatrix4 *result, float fovyRadians, float aspect, float zNear, float zFar );
+
+/*
+ * Construct a perspective projection matrix based on frustum
+ */
+static inline void vmathM4MakeFrustum( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar );
+
+/*
+ * Construct an orthographic projection matrix
+ */
+static inline void vmathM4MakeOrthographic( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar );
+
+/*
+ * Append (post-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathM4AppendScale( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathM4PrependScale( VmathMatrix4 *result, const VmathVector3 *scaleVec, const VmathMatrix4 *mat );
+
+/*
+ * Multiply two 4x4 matrices per element
+ */
+static inline void vmathM4MulPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
+
+/*
+ * Compute the absolute value of a 4x4 matrix per element
+ */
+static inline void vmathM4AbsPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Transpose of a 4x4 matrix
+ */
+static inline void vmathM4Transpose( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathM4Inverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathM4AffineInverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+ */
+static inline void vmathM4OrthoInverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Determinant of a 4x4 matrix
+ */
+static inline float vmathM4Determinant( const VmathMatrix4 *mat );
+
+/*
+ * Conditionally select between two 4x4 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathM4Select( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4x4 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM4Print( const VmathMatrix4 *mat );
+
+/*
+ * Print a 4x4 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM4Prints( const VmathMatrix4 *mat, const char *name );
+
+#endif
+
+/*
+ * Copy a 3x4 transformation matrix
+ */
+static inline void vmathT3Copy( VmathTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Construct a 3x4 transformation matrix containing the specified columns
+ */
+static inline void vmathT3MakeFromCols( VmathTransform3 *result, const VmathVector3 *col0, const VmathVector3 *col1, const VmathVector3 *col2, const VmathVector3 *col3 );
+
+/*
+ * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline void vmathT3MakeFromM3V3( VmathTransform3 *result, const VmathMatrix3 *tfrm, const VmathVector3 *translateVec );
+
+/*
+ * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline void vmathT3MakeFromQV3( VmathTransform3 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec );
+
+/*
+ * Set all elements of a 3x4 transformation matrix to the same scalar value
+ */
+static inline void vmathT3MakeFromScalar( VmathTransform3 *result, float scalar );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ */
+static inline void vmathT3SetUpper3x3( VmathTransform3 *result, const VmathMatrix3 *mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetUpper3x3( VmathMatrix3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Set translation component
+ */
+static inline void vmathT3SetTranslation( VmathTransform3 *result, const VmathVector3 *translateVec );
+
+/*
+ * Get the translation component of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetTranslation( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Set column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol0( VmathTransform3 *result, const VmathVector3 *col0 );
+
+/*
+ * Set column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol1( VmathTransform3 *result, const VmathVector3 *col1 );
+
+/*
+ * Set column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol2( VmathTransform3 *result, const VmathVector3 *col2 );
+
+/*
+ * Set column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol3( VmathTransform3 *result, const VmathVector3 *col3 );
+
+/*
+ * Get column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetCol0( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Get column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetCol1( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Get column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetCol2( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Get column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetCol3( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Set the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3SetCol( VmathTransform3 *result, int col, const VmathVector3 *vec );
+
+/*
+ * Set the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3SetRow( VmathTransform3 *result, int row, const VmathVector4 *vec );
+
+/*
+ * Get the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3GetCol( VmathVector3 *result, const VmathTransform3 *tfrm, int col );
+
+/*
+ * Get the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3GetRow( VmathVector4 *result, const VmathTransform3 *tfrm, int row );
+
+/*
+ * Set the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline void vmathT3SetElem( VmathTransform3 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline float vmathT3GetElem( const VmathTransform3 *tfrm, int col, int row );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D vector
+ */
+static inline void vmathT3MulV3( VmathVector3 *result, const VmathTransform3 *tfrm, const VmathVector3 *vec );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D point
+ */
+static inline void vmathT3MulP3( VmathPoint3 *result, const VmathTransform3 *tfrm, const VmathPoint3 *pnt );
+
+/*
+ * Multiply two 3x4 transformation matrices
+ */
+static inline void vmathT3Mul( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 );
+
+/*
+ * Construct an identity 3x4 transformation matrix
+ */
+static inline void vmathT3MakeIdentity( VmathTransform3 *result );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x axis
+ */
+static inline void vmathT3MakeRotationX( VmathTransform3 *result, float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the y axis
+ */
+static inline void vmathT3MakeRotationY( VmathTransform3 *result, float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the z axis
+ */
+static inline void vmathT3MakeRotationZ( VmathTransform3 *result, float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathT3MakeRotationZYX( VmathTransform3 *result, const VmathVector3 *radiansXYZ );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathT3MakeRotationAxis( VmathTransform3 *result, float radians, const VmathVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathT3MakeRotationQ( VmathTransform3 *result, const VmathQuat *unitQuat );
+
+/*
+ * Construct a 3x4 transformation matrix to perform scaling
+ */
+static inline void vmathT3MakeScale( VmathTransform3 *result, const VmathVector3 *scaleVec );
+
+/*
+ * Construct a 3x4 transformation matrix to perform translation
+ */
+static inline void vmathT3MakeTranslation( VmathTransform3 *result, const VmathVector3 *translateVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathT3AppendScale( VmathTransform3 *result, const VmathTransform3 *tfrm, const VmathVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathT3PrependScale( VmathTransform3 *result, const VmathVector3 *scaleVec, const VmathTransform3 *tfrm );
+
+/*
+ * Multiply two 3x4 transformation matrices per element
+ */
+static inline void vmathT3MulPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 );
+
+/*
+ * Compute the absolute value of a 3x4 transformation matrix per element
+ */
+static inline void vmathT3AbsPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Inverse of a 3x4 transformation matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+ */
+static inline void vmathT3Inverse( VmathTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+ */
+static inline void vmathT3OrthoInverse( VmathTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Conditionally select between two 3x4 transformation matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathT3Select( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x4 transformation matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathT3Print( const VmathTransform3 *tfrm );
+
+/*
+ * Print a 3x4 transformation matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathT3Prints( const VmathTransform3 *tfrm, const char *name );
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#include "vec_aos.h"
+#include "quat_aos.h"
+#include "mat_aos.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_aos_v.h b/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_aos_v.h
index 242d938a0..cca0872f0 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_aos_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_aos_v.h
@@ -1,1916 +1,1916 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_AOS_C_V_SPU_H
-#define _VECTORMATH_AOS_C_V_SPU_H
-
-#include <math.h>
-#include <spu_intrinsics.h>
-
-#ifdef _VECTORMATH_DEBUG
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#ifndef _VECTORMATH_AOS_C_TYPES_H
-#define _VECTORMATH_AOS_C_TYPES_H
-
-/* A 3-D vector in array-of-structures format
- */
-typedef struct _VmathVector3
-{
-    vec_float4 vec128;
-} VmathVector3;
-
-/* A 4-D vector in array-of-structures format
- */
-typedef struct _VmathVector4
-{
-    vec_float4 vec128;
-} VmathVector4;
-
-/* A 3-D point in array-of-structures format
- */
-typedef struct _VmathPoint3
-{
-    vec_float4 vec128;
-} VmathPoint3;
-
-/* A quaternion in array-of-structures format
- */
-typedef struct _VmathQuat
-{
-    vec_float4 vec128;
-} VmathQuat;
-
-/* A 3x3 matrix in array-of-structures format
- */
-typedef struct _VmathMatrix3
-{
-    VmathVector3 col0;
-    VmathVector3 col1;
-    VmathVector3 col2;
-} VmathMatrix3;
-
-/* A 4x4 matrix in array-of-structures format
- */
-typedef struct _VmathMatrix4
-{
-    VmathVector4 col0;
-    VmathVector4 col1;
-    VmathVector4 col2;
-    VmathVector4 col3;
-} VmathMatrix4;
-
-/* A 3x4 transformation matrix in array-of-structures format
- */
-typedef struct _VmathTransform3
-{
-    VmathVector3 col0;
-    VmathVector3 col1;
-    VmathVector3 col2;
-    VmathVector3 col3;
-} VmathTransform3;
-
-#endif
-
-/*
- * Construct a 3-D vector from x, y, and z elements
- */
-static inline VmathVector3 vmathV3MakeFromElems_V( float x, float y, float z );
-
-/*
- * Copy elements from a 3-D point into a 3-D vector
- */
-static inline VmathVector3 vmathV3MakeFromP3_V( VmathPoint3 pnt );
-
-/*
- * Set all elements of a 3-D vector to the same scalar value
- */
-static inline VmathVector3 vmathV3MakeFromScalar_V( float scalar );
-
-/*
- * Set vector float data in a 3-D vector
- */
-static inline VmathVector3 vmathV3MakeFrom128_V( vec_float4 vf4 );
-
-/*
- * Get vector float data from a 3-D vector
- */
-static inline vec_float4 vmathV3Get128_V( VmathVector3 vec );
-
-/*
- * Set the x element of a 3-D vector
- */
-static inline void vmathV3SetX_V( VmathVector3 *result, float x );
-
-/*
- * Set the y element of a 3-D vector
- */
-static inline void vmathV3SetY_V( VmathVector3 *result, float y );
-
-/*
- * Set the z element of a 3-D vector
- */
-static inline void vmathV3SetZ_V( VmathVector3 *result, float z );
-
-/*
- * Get the x element of a 3-D vector
- */
-static inline float vmathV3GetX_V( VmathVector3 vec );
-
-/*
- * Get the y element of a 3-D vector
- */
-static inline float vmathV3GetY_V( VmathVector3 vec );
-
-/*
- * Get the z element of a 3-D vector
- */
-static inline float vmathV3GetZ_V( VmathVector3 vec );
-
-/*
- * Set an x, y, or z element of a 3-D vector by index
- */
-static inline void vmathV3SetElem_V( VmathVector3 *result, int idx, float value );
-
-/*
- * Get an x, y, or z element of a 3-D vector by index
- */
-static inline float vmathV3GetElem_V( VmathVector3 vec, int idx );
-
-/*
- * Add two 3-D vectors
- */
-static inline VmathVector3 vmathV3Add_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Subtract a 3-D vector from another 3-D vector
- */
-static inline VmathVector3 vmathV3Sub_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Add a 3-D vector to a 3-D point
- */
-static inline VmathPoint3 vmathV3AddP3_V( VmathVector3 vec, VmathPoint3 pnt );
-
-/*
- * Multiply a 3-D vector by a scalar
- */
-static inline VmathVector3 vmathV3ScalarMul_V( VmathVector3 vec, float scalar );
-
-/*
- * Divide a 3-D vector by a scalar
- */
-static inline VmathVector3 vmathV3ScalarDiv_V( VmathVector3 vec, float scalar );
-
-/*
- * Negate all elements of a 3-D vector
- */
-static inline VmathVector3 vmathV3Neg_V( VmathVector3 vec );
-
-/*
- * Construct x axis
- */
-static inline VmathVector3 vmathV3MakeXAxis_V( );
-
-/*
- * Construct y axis
- */
-static inline VmathVector3 vmathV3MakeYAxis_V( );
-
-/*
- * Construct z axis
- */
-static inline VmathVector3 vmathV3MakeZAxis_V( );
-
-/*
- * Multiply two 3-D vectors per element
- */
-static inline VmathVector3 vmathV3MulPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Divide two 3-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathVector3 vmathV3DivPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Compute the reciprocal of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathVector3 vmathV3RecipPerElem_V( VmathVector3 vec );
-
-/*
- * Compute the square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathVector3 vmathV3SqrtPerElem_V( VmathVector3 vec );
-
-/*
- * Compute the reciprocal square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathVector3 vmathV3RsqrtPerElem_V( VmathVector3 vec );
-
-/*
- * Compute the absolute value of a 3-D vector per element
- */
-static inline VmathVector3 vmathV3AbsPerElem_V( VmathVector3 vec );
-
-/*
- * Copy sign from one 3-D vector to another, per element
- */
-static inline VmathVector3 vmathV3CopySignPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Maximum of two 3-D vectors per element
- */
-static inline VmathVector3 vmathV3MaxPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Minimum of two 3-D vectors per element
- */
-static inline VmathVector3 vmathV3MinPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Maximum element of a 3-D vector
- */
-static inline float vmathV3MaxElem_V( VmathVector3 vec );
-
-/*
- * Minimum element of a 3-D vector
- */
-static inline float vmathV3MinElem_V( VmathVector3 vec );
-
-/*
- * Compute the sum of all elements of a 3-D vector
- */
-static inline float vmathV3Sum_V( VmathVector3 vec );
-
-/*
- * Compute the dot product of two 3-D vectors
- */
-static inline float vmathV3Dot_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Compute the square of the length of a 3-D vector
- */
-static inline float vmathV3LengthSqr_V( VmathVector3 vec );
-
-/*
- * Compute the length of a 3-D vector
- */
-static inline float vmathV3Length_V( VmathVector3 vec );
-
-/*
- * Normalize a 3-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline VmathVector3 vmathV3Normalize_V( VmathVector3 vec );
-
-/*
- * Compute cross product of two 3-D vectors
- */
-static inline VmathVector3 vmathV3Cross_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Outer product of two 3-D vectors
- */
-static inline VmathMatrix3 vmathV3Outer_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Pre-multiply a row vector by a 3x3 matrix
- * NOTE: 
- * Slower than column post-multiply.
- */
-static inline VmathVector3 vmathV3RowMul_V( VmathVector3 vec, VmathMatrix3 mat );
-
-/*
- * Cross-product matrix of a 3-D vector
- */
-static inline VmathMatrix3 vmathV3CrossMatrix_V( VmathVector3 vec );
-
-/*
- * Create cross-product matrix and multiply
- * NOTE: 
- * Faster than separately creating a cross-product matrix and multiplying.
- */
-static inline VmathMatrix3 vmathV3CrossMatrixMul_V( VmathVector3 vec, VmathMatrix3 mat );
-
-/*
- * Linear interpolation between two 3-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathVector3 vmathV3Lerp_V( float t, VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Spherical linear interpolation between two 3-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathVector3 vmathV3Slerp_V( float t, VmathVector3 unitVec0, VmathVector3 unitVec1 );
-
-/*
- * Conditionally select between two 3-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathVector3 vmathV3Select_V( VmathVector3 vec0, VmathVector3 vec1, unsigned int select1 );
-
-/*
- * Store x, y, and z elements of a 3-D vector in the first three words of a quadword.
- * The value of the fourth word (the word with the highest address) remains unchanged
- */
-static inline void vmathV3StoreXYZ_V( VmathVector3 vec, vec_float4 *quad );
-
-/*
- * Load four three-float 3-D vectors, stored in three quadwords
- */
-static inline void vmathV3LoadXYZArray_V( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads );
-
-/*
- * Store four 3-D vectors in three quadwords
- */
-static inline void vmathV3StoreXYZArray_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, vec_float4 *threeQuads );
-
-/*
- * Store eight 3-D vectors as half-floats
- */
-static inline void vmathV3StoreHalfFloats_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, VmathVector3 vec4, VmathVector3 vec5, VmathVector3 vec6, VmathVector3 vec7, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV3Print_V( VmathVector3 vec );
-
-/*
- * Print a 3-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV3Prints_V( VmathVector3 vec, const char *name );
-
-#endif
-
-/*
- * Construct a 4-D vector from x, y, z, and w elements
- */
-static inline VmathVector4 vmathV4MakeFromElems_V( float x, float y, float z, float w );
-
-/*
- * Construct a 4-D vector from a 3-D vector and a scalar
- */
-static inline VmathVector4 vmathV4MakeFromV3Scalar_V( VmathVector3 xyz, float w );
-
-/*
- * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
- */
-static inline VmathVector4 vmathV4MakeFromV3_V( VmathVector3 vec );
-
-/*
- * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
- */
-static inline VmathVector4 vmathV4MakeFromP3_V( VmathPoint3 pnt );
-
-/*
- * Copy elements from a quaternion into a 4-D vector
- */
-static inline VmathVector4 vmathV4MakeFromQ_V( VmathQuat quat );
-
-/*
- * Set all elements of a 4-D vector to the same scalar value
- */
-static inline VmathVector4 vmathV4MakeFromScalar_V( float scalar );
-
-/*
- * Set vector float data in a 4-D vector
- */
-static inline VmathVector4 vmathV4MakeFrom128_V( vec_float4 vf4 );
-
-/*
- * Get vector float data from a 4-D vector
- */
-static inline vec_float4 vmathV4Get128_V( VmathVector4 vec );
-
-/*
- * Set the x, y, and z elements of a 4-D vector
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathV4SetXYZ_V( VmathVector4 *result, VmathVector3 vec );
-
-/*
- * Get the x, y, and z elements of a 4-D vector
- */
-static inline VmathVector3 vmathV4GetXYZ_V( VmathVector4 vec );
-
-/*
- * Set the x element of a 4-D vector
- */
-static inline void vmathV4SetX_V( VmathVector4 *result, float x );
-
-/*
- * Set the y element of a 4-D vector
- */
-static inline void vmathV4SetY_V( VmathVector4 *result, float y );
-
-/*
- * Set the z element of a 4-D vector
- */
-static inline void vmathV4SetZ_V( VmathVector4 *result, float z );
-
-/*
- * Set the w element of a 4-D vector
- */
-static inline void vmathV4SetW_V( VmathVector4 *result, float w );
-
-/*
- * Get the x element of a 4-D vector
- */
-static inline float vmathV4GetX_V( VmathVector4 vec );
-
-/*
- * Get the y element of a 4-D vector
- */
-static inline float vmathV4GetY_V( VmathVector4 vec );
-
-/*
- * Get the z element of a 4-D vector
- */
-static inline float vmathV4GetZ_V( VmathVector4 vec );
-
-/*
- * Get the w element of a 4-D vector
- */
-static inline float vmathV4GetW_V( VmathVector4 vec );
-
-/*
- * Set an x, y, z, or w element of a 4-D vector by index
- */
-static inline void vmathV4SetElem_V( VmathVector4 *result, int idx, float value );
-
-/*
- * Get an x, y, z, or w element of a 4-D vector by index
- */
-static inline float vmathV4GetElem_V( VmathVector4 vec, int idx );
-
-/*
- * Add two 4-D vectors
- */
-static inline VmathVector4 vmathV4Add_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Subtract a 4-D vector from another 4-D vector
- */
-static inline VmathVector4 vmathV4Sub_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Multiply a 4-D vector by a scalar
- */
-static inline VmathVector4 vmathV4ScalarMul_V( VmathVector4 vec, float scalar );
-
-/*
- * Divide a 4-D vector by a scalar
- */
-static inline VmathVector4 vmathV4ScalarDiv_V( VmathVector4 vec, float scalar );
-
-/*
- * Negate all elements of a 4-D vector
- */
-static inline VmathVector4 vmathV4Neg_V( VmathVector4 vec );
-
-/*
- * Construct x axis
- */
-static inline VmathVector4 vmathV4MakeXAxis_V( );
-
-/*
- * Construct y axis
- */
-static inline VmathVector4 vmathV4MakeYAxis_V( );
-
-/*
- * Construct z axis
- */
-static inline VmathVector4 vmathV4MakeZAxis_V( );
-
-/*
- * Construct w axis
- */
-static inline VmathVector4 vmathV4MakeWAxis_V( );
-
-/*
- * Multiply two 4-D vectors per element
- */
-static inline VmathVector4 vmathV4MulPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Divide two 4-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathVector4 vmathV4DivPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Compute the reciprocal of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathVector4 vmathV4RecipPerElem_V( VmathVector4 vec );
-
-/*
- * Compute the square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathVector4 vmathV4SqrtPerElem_V( VmathVector4 vec );
-
-/*
- * Compute the reciprocal square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathVector4 vmathV4RsqrtPerElem_V( VmathVector4 vec );
-
-/*
- * Compute the absolute value of a 4-D vector per element
- */
-static inline VmathVector4 vmathV4AbsPerElem_V( VmathVector4 vec );
-
-/*
- * Copy sign from one 4-D vector to another, per element
- */
-static inline VmathVector4 vmathV4CopySignPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Maximum of two 4-D vectors per element
- */
-static inline VmathVector4 vmathV4MaxPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Minimum of two 4-D vectors per element
- */
-static inline VmathVector4 vmathV4MinPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Maximum element of a 4-D vector
- */
-static inline float vmathV4MaxElem_V( VmathVector4 vec );
-
-/*
- * Minimum element of a 4-D vector
- */
-static inline float vmathV4MinElem_V( VmathVector4 vec );
-
-/*
- * Compute the sum of all elements of a 4-D vector
- */
-static inline float vmathV4Sum_V( VmathVector4 vec );
-
-/*
- * Compute the dot product of two 4-D vectors
- */
-static inline float vmathV4Dot_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Compute the square of the length of a 4-D vector
- */
-static inline float vmathV4LengthSqr_V( VmathVector4 vec );
-
-/*
- * Compute the length of a 4-D vector
- */
-static inline float vmathV4Length_V( VmathVector4 vec );
-
-/*
- * Normalize a 4-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline VmathVector4 vmathV4Normalize_V( VmathVector4 vec );
-
-/*
- * Outer product of two 4-D vectors
- */
-static inline VmathMatrix4 vmathV4Outer_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Linear interpolation between two 4-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathVector4 vmathV4Lerp_V( float t, VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Spherical linear interpolation between two 4-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathVector4 vmathV4Slerp_V( float t, VmathVector4 unitVec0, VmathVector4 unitVec1 );
-
-/*
- * Conditionally select between two 4-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathVector4 vmathV4Select_V( VmathVector4 vec0, VmathVector4 vec1, unsigned int select1 );
-
-/*
- * Store four 4-D vectors as half-floats
- */
-static inline void vmathV4StoreHalfFloats_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3, vec_ushort8 *twoQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV4Print_V( VmathVector4 vec );
-
-/*
- * Print a 4-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV4Prints_V( VmathVector4 vec, const char *name );
-
-#endif
-
-/*
- * Construct a 3-D point from x, y, and z elements
- */
-static inline VmathPoint3 vmathP3MakeFromElems_V( float x, float y, float z );
-
-/*
- * Copy elements from a 3-D vector into a 3-D point
- */
-static inline VmathPoint3 vmathP3MakeFromV3_V( VmathVector3 vec );
-
-/*
- * Set all elements of a 3-D point to the same scalar value
- */
-static inline VmathPoint3 vmathP3MakeFromScalar_V( float scalar );
-
-/*
- * Set vector float data in a 3-D point
- */
-static inline VmathPoint3 vmathP3MakeFrom128_V( vec_float4 vf4 );
-
-/*
- * Get vector float data from a 3-D point
- */
-static inline vec_float4 vmathP3Get128_V( VmathPoint3 pnt );
-
-/*
- * Set the x element of a 3-D point
- */
-static inline void vmathP3SetX_V( VmathPoint3 *result, float x );
-
-/*
- * Set the y element of a 3-D point
- */
-static inline void vmathP3SetY_V( VmathPoint3 *result, float y );
-
-/*
- * Set the z element of a 3-D point
- */
-static inline void vmathP3SetZ_V( VmathPoint3 *result, float z );
-
-/*
- * Get the x element of a 3-D point
- */
-static inline float vmathP3GetX_V( VmathPoint3 pnt );
-
-/*
- * Get the y element of a 3-D point
- */
-static inline float vmathP3GetY_V( VmathPoint3 pnt );
-
-/*
- * Get the z element of a 3-D point
- */
-static inline float vmathP3GetZ_V( VmathPoint3 pnt );
-
-/*
- * Set an x, y, or z element of a 3-D point by index
- */
-static inline void vmathP3SetElem_V( VmathPoint3 *result, int idx, float value );
-
-/*
- * Get an x, y, or z element of a 3-D point by index
- */
-static inline float vmathP3GetElem_V( VmathPoint3 pnt, int idx );
-
-/*
- * Subtract a 3-D point from another 3-D point
- */
-static inline VmathVector3 vmathP3Sub_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Add a 3-D point to a 3-D vector
- */
-static inline VmathPoint3 vmathP3AddV3_V( VmathPoint3 pnt, VmathVector3 vec );
-
-/*
- * Subtract a 3-D vector from a 3-D point
- */
-static inline VmathPoint3 vmathP3SubV3_V( VmathPoint3 pnt, VmathVector3 vec );
-
-/*
- * Multiply two 3-D points per element
- */
-static inline VmathPoint3 vmathP3MulPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Divide two 3-D points per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathPoint3 vmathP3DivPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Compute the reciprocal of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathPoint3 vmathP3RecipPerElem_V( VmathPoint3 pnt );
-
-/*
- * Compute the square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathPoint3 vmathP3SqrtPerElem_V( VmathPoint3 pnt );
-
-/*
- * Compute the reciprocal square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathPoint3 vmathP3RsqrtPerElem_V( VmathPoint3 pnt );
-
-/*
- * Compute the absolute value of a 3-D point per element
- */
-static inline VmathPoint3 vmathP3AbsPerElem_V( VmathPoint3 pnt );
-
-/*
- * Copy sign from one 3-D point to another, per element
- */
-static inline VmathPoint3 vmathP3CopySignPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Maximum of two 3-D points per element
- */
-static inline VmathPoint3 vmathP3MaxPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Minimum of two 3-D points per element
- */
-static inline VmathPoint3 vmathP3MinPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Maximum element of a 3-D point
- */
-static inline float vmathP3MaxElem_V( VmathPoint3 pnt );
-
-/*
- * Minimum element of a 3-D point
- */
-static inline float vmathP3MinElem_V( VmathPoint3 pnt );
-
-/*
- * Compute the sum of all elements of a 3-D point
- */
-static inline float vmathP3Sum_V( VmathPoint3 pnt );
-
-/*
- * Apply uniform scale to a 3-D point
- */
-static inline VmathPoint3 vmathP3Scale_V( VmathPoint3 pnt, float scaleVal );
-
-/*
- * Apply non-uniform scale to a 3-D point
- */
-static inline VmathPoint3 vmathP3NonUniformScale_V( VmathPoint3 pnt, VmathVector3 scaleVec );
-
-/*
- * Scalar projection of a 3-D point on a unit-length 3-D vector
- */
-static inline float vmathP3Projection_V( VmathPoint3 pnt, VmathVector3 unitVec );
-
-/*
- * Compute the square of the distance of a 3-D point from the coordinate-system origin
- */
-static inline float vmathP3DistSqrFromOrigin_V( VmathPoint3 pnt );
-
-/*
- * Compute the distance of a 3-D point from the coordinate-system origin
- */
-static inline float vmathP3DistFromOrigin_V( VmathPoint3 pnt );
-
-/*
- * Compute the square of the distance between two 3-D points
- */
-static inline float vmathP3DistSqr_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Compute the distance between two 3-D points
- */
-static inline float vmathP3Dist_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Linear interpolation between two 3-D points
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathPoint3 vmathP3Lerp_V( float t, VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Conditionally select between two 3-D points
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathPoint3 vmathP3Select_V( VmathPoint3 pnt0, VmathPoint3 pnt1, unsigned int select1 );
-
-/*
- * Store x, y, and z elements of a 3-D point in the first three words of a quadword.
- * The value of the fourth word (the word with the highest address) remains unchanged
- */
-static inline void vmathP3StoreXYZ_V( VmathPoint3 pnt, vec_float4 *quad );
-
-/*
- * Load four three-float 3-D points, stored in three quadwords
- */
-static inline void vmathP3LoadXYZArray_V( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads );
-
-/*
- * Store four 3-D points in three quadwords
- */
-static inline void vmathP3StoreXYZArray_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, vec_float4 *threeQuads );
-
-/*
- * Store eight 3-D points as half-floats
- */
-static inline void vmathP3StoreHalfFloats_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, VmathPoint3 pnt4, VmathPoint3 pnt5, VmathPoint3 pnt6, VmathPoint3 pnt7, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D point
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathP3Print_V( VmathPoint3 pnt );
-
-/*
- * Print a 3-D point and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathP3Prints_V( VmathPoint3 pnt, const char *name );
-
-#endif
-
-/*
- * Construct a quaternion from x, y, z, and w elements
- */
-static inline VmathQuat vmathQMakeFromElems_V( float x, float y, float z, float w );
-
-/*
- * Construct a quaternion from a 3-D vector and a scalar
- */
-static inline VmathQuat vmathQMakeFromV3Scalar_V( VmathVector3 xyz, float w );
-
-/*
- * Copy elements from a 4-D vector into a quaternion
- */
-static inline VmathQuat vmathQMakeFromV4_V( VmathVector4 vec );
-
-/*
- * Convert a rotation matrix to a unit-length quaternion
- */
-static inline VmathQuat vmathQMakeFromM3_V( VmathMatrix3 rotMat );
-
-/*
- * Set all elements of a quaternion to the same scalar value
- */
-static inline VmathQuat vmathQMakeFromScalar_V( float scalar );
-
-/*
- * Set vector float data in a quaternion
- */
-static inline VmathQuat vmathQMakeFrom128_V( vec_float4 vf4 );
-
-/*
- * Get vector float data from a quaternion
- */
-static inline vec_float4 vmathQGet128_V( VmathQuat quat );
-
-/*
- * Set the x, y, and z elements of a quaternion
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathQSetXYZ_V( VmathQuat *result, VmathVector3 vec );
-
-/*
- * Get the x, y, and z elements of a quaternion
- */
-static inline VmathVector3 vmathQGetXYZ_V( VmathQuat quat );
-
-/*
- * Set the x element of a quaternion
- */
-static inline void vmathQSetX_V( VmathQuat *result, float x );
-
-/*
- * Set the y element of a quaternion
- */
-static inline void vmathQSetY_V( VmathQuat *result, float y );
-
-/*
- * Set the z element of a quaternion
- */
-static inline void vmathQSetZ_V( VmathQuat *result, float z );
-
-/*
- * Set the w element of a quaternion
- */
-static inline void vmathQSetW_V( VmathQuat *result, float w );
-
-/*
- * Get the x element of a quaternion
- */
-static inline float vmathQGetX_V( VmathQuat quat );
-
-/*
- * Get the y element of a quaternion
- */
-static inline float vmathQGetY_V( VmathQuat quat );
-
-/*
- * Get the z element of a quaternion
- */
-static inline float vmathQGetZ_V( VmathQuat quat );
-
-/*
- * Get the w element of a quaternion
- */
-static inline float vmathQGetW_V( VmathQuat quat );
-
-/*
- * Set an x, y, z, or w element of a quaternion by index
- */
-static inline void vmathQSetElem_V( VmathQuat *result, int idx, float value );
-
-/*
- * Get an x, y, z, or w element of a quaternion by index
- */
-static inline float vmathQGetElem_V( VmathQuat quat, int idx );
-
-/*
- * Add two quaternions
- */
-static inline VmathQuat vmathQAdd_V( VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Subtract a quaternion from another quaternion
- */
-static inline VmathQuat vmathQSub_V( VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Multiply two quaternions
- */
-static inline VmathQuat vmathQMul_V( VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Multiply a quaternion by a scalar
- */
-static inline VmathQuat vmathQScalarMul_V( VmathQuat quat, float scalar );
-
-/*
- * Divide a quaternion by a scalar
- */
-static inline VmathQuat vmathQScalarDiv_V( VmathQuat quat, float scalar );
-
-/*
- * Negate all elements of a quaternion
- */
-static inline VmathQuat vmathQNeg_V( VmathQuat quat );
-
-/*
- * Construct an identity quaternion
- */
-static inline VmathQuat vmathQMakeIdentity_V( );
-
-/*
- * Construct a quaternion to rotate between two unit-length 3-D vectors
- * NOTE: 
- * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
- */
-static inline VmathQuat vmathQMakeRotationArc_V( VmathVector3 unitVec0, VmathVector3 unitVec1 );
-
-/*
- * Construct a quaternion to rotate around a unit-length 3-D vector
- */
-static inline VmathQuat vmathQMakeRotationAxis_V( float radians, VmathVector3 unitVec );
-
-/*
- * Construct a quaternion to rotate around the x axis
- */
-static inline VmathQuat vmathQMakeRotationX_V( float radians );
-
-/*
- * Construct a quaternion to rotate around the y axis
- */
-static inline VmathQuat vmathQMakeRotationY_V( float radians );
-
-/*
- * Construct a quaternion to rotate around the z axis
- */
-static inline VmathQuat vmathQMakeRotationZ_V( float radians );
-
-/*
- * Compute the conjugate of a quaternion
- */
-static inline VmathQuat vmathQConj_V( VmathQuat quat );
-
-/*
- * Use a unit-length quaternion to rotate a 3-D vector
- */
-static inline VmathVector3 vmathQRotate_V( VmathQuat unitQuat, VmathVector3 vec );
-
-/*
- * Compute the dot product of two quaternions
- */
-static inline float vmathQDot_V( VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Compute the norm of a quaternion
- */
-static inline float vmathQNorm_V( VmathQuat quat );
-
-/*
- * Compute the length of a quaternion
- */
-static inline float vmathQLength_V( VmathQuat quat );
-
-/*
- * Normalize a quaternion
- * NOTE: 
- * The result is unpredictable when all elements of quat are at or near zero.
- */
-static inline VmathQuat vmathQNormalize_V( VmathQuat quat );
-
-/*
- * Linear interpolation between two quaternions
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathQuat vmathQLerp_V( float t, VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Spherical linear interpolation between two quaternions
- * NOTE: 
- * Interpolates along the shortest path between orientations.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathQuat vmathQSlerp_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1 );
-
-/*
- * Spherical quadrangle interpolation
- */
-static inline VmathQuat vmathQSquad_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1, VmathQuat unitQuat2, VmathQuat unitQuat3 );
-
-/*
- * Conditionally select between two quaternions
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathQuat vmathQSelect_V( VmathQuat quat0, VmathQuat quat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a quaternion
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathQPrint_V( VmathQuat quat );
-
-/*
- * Print a quaternion and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathQPrints_V( VmathQuat quat, const char *name );
-
-#endif
-
-/*
- * Construct a 3x3 matrix containing the specified columns
- */
-static inline VmathMatrix3 vmathM3MakeFromCols_V( VmathVector3 col0, VmathVector3 col1, VmathVector3 col2 );
-
-/*
- * Construct a 3x3 rotation matrix from a unit-length quaternion
- */
-static inline VmathMatrix3 vmathM3MakeFromQ_V( VmathQuat unitQuat );
-
-/*
- * Set all elements of a 3x3 matrix to the same scalar value
- */
-static inline VmathMatrix3 vmathM3MakeFromScalar_V( float scalar );
-
-/*
- * Set column 0 of a 3x3 matrix
- */
-static inline void vmathM3SetCol0_V( VmathMatrix3 *result, VmathVector3 col0 );
-
-/*
- * Set column 1 of a 3x3 matrix
- */
-static inline void vmathM3SetCol1_V( VmathMatrix3 *result, VmathVector3 col1 );
-
-/*
- * Set column 2 of a 3x3 matrix
- */
-static inline void vmathM3SetCol2_V( VmathMatrix3 *result, VmathVector3 col2 );
-
-/*
- * Get column 0 of a 3x3 matrix
- */
-static inline VmathVector3 vmathM3GetCol0_V( VmathMatrix3 mat );
-
-/*
- * Get column 1 of a 3x3 matrix
- */
-static inline VmathVector3 vmathM3GetCol1_V( VmathMatrix3 mat );
-
-/*
- * Get column 2 of a 3x3 matrix
- */
-static inline VmathVector3 vmathM3GetCol2_V( VmathMatrix3 mat );
-
-/*
- * Set the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3SetCol_V( VmathMatrix3 *result, int col, VmathVector3 vec );
-
-/*
- * Set the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3SetRow_V( VmathMatrix3 *result, int row, VmathVector3 vec );
-
-/*
- * Get the column of a 3x3 matrix referred to by the specified index
- */
-static inline VmathVector3 vmathM3GetCol_V( VmathMatrix3 mat, int col );
-
-/*
- * Get the row of a 3x3 matrix referred to by the specified index
- */
-static inline VmathVector3 vmathM3GetRow_V( VmathMatrix3 mat, int row );
-
-/*
- * Set the element of a 3x3 matrix referred to by column and row indices
- */
-static inline void vmathM3SetElem_V( VmathMatrix3 *result, int col, int row, float val );
-
-/*
- * Get the element of a 3x3 matrix referred to by column and row indices
- */
-static inline float vmathM3GetElem_V( VmathMatrix3 mat, int col, int row );
-
-/*
- * Add two 3x3 matrices
- */
-static inline VmathMatrix3 vmathM3Add_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
-
-/*
- * Subtract a 3x3 matrix from another 3x3 matrix
- */
-static inline VmathMatrix3 vmathM3Sub_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
-
-/*
- * Negate all elements of a 3x3 matrix
- */
-static inline VmathMatrix3 vmathM3Neg_V( VmathMatrix3 mat );
-
-/*
- * Multiply a 3x3 matrix by a scalar
- */
-static inline VmathMatrix3 vmathM3ScalarMul_V( VmathMatrix3 mat, float scalar );
-
-/*
- * Multiply a 3x3 matrix by a 3-D vector
- */
-static inline VmathVector3 vmathM3MulV3_V( VmathMatrix3 mat, VmathVector3 vec );
-
-/*
- * Multiply two 3x3 matrices
- */
-static inline VmathMatrix3 vmathM3Mul_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
-
-/*
- * Construct an identity 3x3 matrix
- */
-static inline VmathMatrix3 vmathM3MakeIdentity_V( );
-
-/*
- * Construct a 3x3 matrix to rotate around the x axis
- */
-static inline VmathMatrix3 vmathM3MakeRotationX_V( float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the y axis
- */
-static inline VmathMatrix3 vmathM3MakeRotationY_V( float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the z axis
- */
-static inline VmathMatrix3 vmathM3MakeRotationZ_V( float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the x, y, and z axes
- */
-static inline VmathMatrix3 vmathM3MakeRotationZYX_V( VmathVector3 radiansXYZ );
-
-/*
- * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathMatrix3 vmathM3MakeRotationAxis_V( float radians, VmathVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathMatrix3 vmathM3MakeRotationQ_V( VmathQuat unitQuat );
-
-/*
- * Construct a 3x3 matrix to perform scaling
- */
-static inline VmathMatrix3 vmathM3MakeScale_V( VmathVector3 scaleVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathMatrix3 vmathM3AppendScale_V( VmathMatrix3 mat, VmathVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathMatrix3 vmathM3PrependScale_V( VmathVector3 scaleVec, VmathMatrix3 mat );
-
-/*
- * Multiply two 3x3 matrices per element
- */
-static inline VmathMatrix3 vmathM3MulPerElem_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
-
-/*
- * Compute the absolute value of a 3x3 matrix per element
- */
-static inline VmathMatrix3 vmathM3AbsPerElem_V( VmathMatrix3 mat );
-
-/*
- * Transpose of a 3x3 matrix
- */
-static inline VmathMatrix3 vmathM3Transpose_V( VmathMatrix3 mat );
-
-/*
- * Compute the inverse of a 3x3 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathMatrix3 vmathM3Inverse_V( VmathMatrix3 mat );
-
-/*
- * Determinant of a 3x3 matrix
- */
-static inline float vmathM3Determinant_V( VmathMatrix3 mat );
-
-/*
- * Conditionally select between two 3x3 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathMatrix3 vmathM3Select_V( VmathMatrix3 mat0, VmathMatrix3 mat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x3 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM3Print_V( VmathMatrix3 mat );
-
-/*
- * Print a 3x3 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM3Prints_V( VmathMatrix3 mat, const char *name );
-
-#endif
-
-/*
- * Construct a 4x4 matrix containing the specified columns
- */
-static inline VmathMatrix4 vmathM4MakeFromCols_V( VmathVector4 col0, VmathVector4 col1, VmathVector4 col2, VmathVector4 col3 );
-
-/*
- * Construct a 4x4 matrix from a 3x4 transformation matrix
- */
-static inline VmathMatrix4 vmathM4MakeFromT3_V( VmathTransform3 mat );
-
-/*
- * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
- */
-static inline VmathMatrix4 vmathM4MakeFromM3V3_V( VmathMatrix3 mat, VmathVector3 translateVec );
-
-/*
- * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
- */
-static inline VmathMatrix4 vmathM4MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec );
-
-/*
- * Set all elements of a 4x4 matrix to the same scalar value
- */
-static inline VmathMatrix4 vmathM4MakeFromScalar_V( float scalar );
-
-/*
- * Set the upper-left 3x3 submatrix
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathM4SetUpper3x3_V( VmathMatrix4 *result, VmathMatrix3 mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 4x4 matrix
- */
-static inline VmathMatrix3 vmathM4GetUpper3x3_V( VmathMatrix4 mat );
-
-/*
- * Set translation component
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathM4SetTranslation_V( VmathMatrix4 *result, VmathVector3 translateVec );
-
-/*
- * Get the translation component of a 4x4 matrix
- */
-static inline VmathVector3 vmathM4GetTranslation_V( VmathMatrix4 mat );
-
-/*
- * Set column 0 of a 4x4 matrix
- */
-static inline void vmathM4SetCol0_V( VmathMatrix4 *result, VmathVector4 col0 );
-
-/*
- * Set column 1 of a 4x4 matrix
- */
-static inline void vmathM4SetCol1_V( VmathMatrix4 *result, VmathVector4 col1 );
-
-/*
- * Set column 2 of a 4x4 matrix
- */
-static inline void vmathM4SetCol2_V( VmathMatrix4 *result, VmathVector4 col2 );
-
-/*
- * Set column 3 of a 4x4 matrix
- */
-static inline void vmathM4SetCol3_V( VmathMatrix4 *result, VmathVector4 col3 );
-
-/*
- * Get column 0 of a 4x4 matrix
- */
-static inline VmathVector4 vmathM4GetCol0_V( VmathMatrix4 mat );
-
-/*
- * Get column 1 of a 4x4 matrix
- */
-static inline VmathVector4 vmathM4GetCol1_V( VmathMatrix4 mat );
-
-/*
- * Get column 2 of a 4x4 matrix
- */
-static inline VmathVector4 vmathM4GetCol2_V( VmathMatrix4 mat );
-
-/*
- * Get column 3 of a 4x4 matrix
- */
-static inline VmathVector4 vmathM4GetCol3_V( VmathMatrix4 mat );
-
-/*
- * Set the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4SetCol_V( VmathMatrix4 *result, int col, VmathVector4 vec );
-
-/*
- * Set the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4SetRow_V( VmathMatrix4 *result, int row, VmathVector4 vec );
-
-/*
- * Get the column of a 4x4 matrix referred to by the specified index
- */
-static inline VmathVector4 vmathM4GetCol_V( VmathMatrix4 mat, int col );
-
-/*
- * Get the row of a 4x4 matrix referred to by the specified index
- */
-static inline VmathVector4 vmathM4GetRow_V( VmathMatrix4 mat, int row );
-
-/*
- * Set the element of a 4x4 matrix referred to by column and row indices
- */
-static inline void vmathM4SetElem_V( VmathMatrix4 *result, int col, int row, float val );
-
-/*
- * Get the element of a 4x4 matrix referred to by column and row indices
- */
-static inline float vmathM4GetElem_V( VmathMatrix4 mat, int col, int row );
-
-/*
- * Add two 4x4 matrices
- */
-static inline VmathMatrix4 vmathM4Add_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
-
-/*
- * Subtract a 4x4 matrix from another 4x4 matrix
- */
-static inline VmathMatrix4 vmathM4Sub_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
-
-/*
- * Negate all elements of a 4x4 matrix
- */
-static inline VmathMatrix4 vmathM4Neg_V( VmathMatrix4 mat );
-
-/*
- * Multiply a 4x4 matrix by a scalar
- */
-static inline VmathMatrix4 vmathM4ScalarMul_V( VmathMatrix4 mat, float scalar );
-
-/*
- * Multiply a 4x4 matrix by a 4-D vector
- */
-static inline VmathVector4 vmathM4MulV4_V( VmathMatrix4 mat, VmathVector4 vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D vector
- */
-static inline VmathVector4 vmathM4MulV3_V( VmathMatrix4 mat, VmathVector3 vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D point
- */
-static inline VmathVector4 vmathM4MulP3_V( VmathMatrix4 mat, VmathPoint3 pnt );
-
-/*
- * Multiply two 4x4 matrices
- */
-static inline VmathMatrix4 vmathM4Mul_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
-
-/*
- * Multiply a 4x4 matrix by a 3x4 transformation matrix
- */
-static inline VmathMatrix4 vmathM4MulT3_V( VmathMatrix4 mat, VmathTransform3 tfrm );
-
-/*
- * Construct an identity 4x4 matrix
- */
-static inline VmathMatrix4 vmathM4MakeIdentity_V( );
-
-/*
- * Construct a 4x4 matrix to rotate around the x axis
- */
-static inline VmathMatrix4 vmathM4MakeRotationX_V( float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the y axis
- */
-static inline VmathMatrix4 vmathM4MakeRotationY_V( float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the z axis
- */
-static inline VmathMatrix4 vmathM4MakeRotationZ_V( float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the x, y, and z axes
- */
-static inline VmathMatrix4 vmathM4MakeRotationZYX_V( VmathVector3 radiansXYZ );
-
-/*
- * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathMatrix4 vmathM4MakeRotationAxis_V( float radians, VmathVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathMatrix4 vmathM4MakeRotationQ_V( VmathQuat unitQuat );
-
-/*
- * Construct a 4x4 matrix to perform scaling
- */
-static inline VmathMatrix4 vmathM4MakeScale_V( VmathVector3 scaleVec );
-
-/*
- * Construct a 4x4 matrix to perform translation
- */
-static inline VmathMatrix4 vmathM4MakeTranslation_V( VmathVector3 translateVec );
-
-/*
- * Construct viewing matrix based on eye position, position looked at, and up direction
- */
-static inline VmathMatrix4 vmathM4MakeLookAt_V( VmathPoint3 eyePos, VmathPoint3 lookAtPos, VmathVector3 upVec );
-
-/*
- * Construct a perspective projection matrix
- */
-static inline VmathMatrix4 vmathM4MakePerspective_V( float fovyRadians, float aspect, float zNear, float zFar );
-
-/*
- * Construct a perspective projection matrix based on frustum
- */
-static inline VmathMatrix4 vmathM4MakeFrustum_V( float left, float right, float bottom, float top, float zNear, float zFar );
-
-/*
- * Construct an orthographic projection matrix
- */
-static inline VmathMatrix4 vmathM4MakeOrthographic_V( float left, float right, float bottom, float top, float zNear, float zFar );
-
-/*
- * Append (post-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathMatrix4 vmathM4AppendScale_V( VmathMatrix4 mat, VmathVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathMatrix4 vmathM4PrependScale_V( VmathVector3 scaleVec, VmathMatrix4 mat );
-
-/*
- * Multiply two 4x4 matrices per element
- */
-static inline VmathMatrix4 vmathM4MulPerElem_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
-
-/*
- * Compute the absolute value of a 4x4 matrix per element
- */
-static inline VmathMatrix4 vmathM4AbsPerElem_V( VmathMatrix4 mat );
-
-/*
- * Transpose of a 4x4 matrix
- */
-static inline VmathMatrix4 vmathM4Transpose_V( VmathMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathMatrix4 vmathM4Inverse_V( VmathMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathMatrix4 vmathM4AffineInverse_V( VmathMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
- */
-static inline VmathMatrix4 vmathM4OrthoInverse_V( VmathMatrix4 mat );
-
-/*
- * Determinant of a 4x4 matrix
- */
-static inline float vmathM4Determinant_V( VmathMatrix4 mat );
-
-/*
- * Conditionally select between two 4x4 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathMatrix4 vmathM4Select_V( VmathMatrix4 mat0, VmathMatrix4 mat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4x4 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM4Print_V( VmathMatrix4 mat );
-
-/*
- * Print a 4x4 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM4Prints_V( VmathMatrix4 mat, const char *name );
-
-#endif
-
-/*
- * Construct a 3x4 transformation matrix containing the specified columns
- */
-static inline VmathTransform3 vmathT3MakeFromCols_V( VmathVector3 col0, VmathVector3 col1, VmathVector3 col2, VmathVector3 col3 );
-
-/*
- * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
- */
-static inline VmathTransform3 vmathT3MakeFromM3V3_V( VmathMatrix3 tfrm, VmathVector3 translateVec );
-
-/*
- * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
- */
-static inline VmathTransform3 vmathT3MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec );
-
-/*
- * Set all elements of a 3x4 transformation matrix to the same scalar value
- */
-static inline VmathTransform3 vmathT3MakeFromScalar_V( float scalar );
-
-/*
- * Set the upper-left 3x3 submatrix
- */
-static inline void vmathT3SetUpper3x3_V( VmathTransform3 *result, VmathMatrix3 mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
- */
-static inline VmathMatrix3 vmathT3GetUpper3x3_V( VmathTransform3 tfrm );
-
-/*
- * Set translation component
- */
-static inline void vmathT3SetTranslation_V( VmathTransform3 *result, VmathVector3 translateVec );
-
-/*
- * Get the translation component of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetTranslation_V( VmathTransform3 tfrm );
-
-/*
- * Set column 0 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol0_V( VmathTransform3 *result, VmathVector3 col0 );
-
-/*
- * Set column 1 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol1_V( VmathTransform3 *result, VmathVector3 col1 );
-
-/*
- * Set column 2 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol2_V( VmathTransform3 *result, VmathVector3 col2 );
-
-/*
- * Set column 3 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol3_V( VmathTransform3 *result, VmathVector3 col3 );
-
-/*
- * Get column 0 of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetCol0_V( VmathTransform3 tfrm );
-
-/*
- * Get column 1 of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetCol1_V( VmathTransform3 tfrm );
-
-/*
- * Get column 2 of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetCol2_V( VmathTransform3 tfrm );
-
-/*
- * Get column 3 of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetCol3_V( VmathTransform3 tfrm );
-
-/*
- * Set the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3SetCol_V( VmathTransform3 *result, int col, VmathVector3 vec );
-
-/*
- * Set the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3SetRow_V( VmathTransform3 *result, int row, VmathVector4 vec );
-
-/*
- * Get the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline VmathVector3 vmathT3GetCol_V( VmathTransform3 tfrm, int col );
-
-/*
- * Get the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline VmathVector4 vmathT3GetRow_V( VmathTransform3 tfrm, int row );
-
-/*
- * Set the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline void vmathT3SetElem_V( VmathTransform3 *result, int col, int row, float val );
-
-/*
- * Get the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline float vmathT3GetElem_V( VmathTransform3 tfrm, int col, int row );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D vector
- */
-static inline VmathVector3 vmathT3MulV3_V( VmathTransform3 tfrm, VmathVector3 vec );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D point
- */
-static inline VmathPoint3 vmathT3MulP3_V( VmathTransform3 tfrm, VmathPoint3 pnt );
-
-/*
- * Multiply two 3x4 transformation matrices
- */
-static inline VmathTransform3 vmathT3Mul_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 );
-
-/*
- * Construct an identity 3x4 transformation matrix
- */
-static inline VmathTransform3 vmathT3MakeIdentity_V( );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x axis
- */
-static inline VmathTransform3 vmathT3MakeRotationX_V( float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the y axis
- */
-static inline VmathTransform3 vmathT3MakeRotationY_V( float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the z axis
- */
-static inline VmathTransform3 vmathT3MakeRotationZ_V( float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
- */
-static inline VmathTransform3 vmathT3MakeRotationZYX_V( VmathVector3 radiansXYZ );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathTransform3 vmathT3MakeRotationAxis_V( float radians, VmathVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathTransform3 vmathT3MakeRotationQ_V( VmathQuat unitQuat );
-
-/*
- * Construct a 3x4 transformation matrix to perform scaling
- */
-static inline VmathTransform3 vmathT3MakeScale_V( VmathVector3 scaleVec );
-
-/*
- * Construct a 3x4 transformation matrix to perform translation
- */
-static inline VmathTransform3 vmathT3MakeTranslation_V( VmathVector3 translateVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathTransform3 vmathT3AppendScale_V( VmathTransform3 tfrm, VmathVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathTransform3 vmathT3PrependScale_V( VmathVector3 scaleVec, VmathTransform3 tfrm );
-
-/*
- * Multiply two 3x4 transformation matrices per element
- */
-static inline VmathTransform3 vmathT3MulPerElem_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 );
-
-/*
- * Compute the absolute value of a 3x4 transformation matrix per element
- */
-static inline VmathTransform3 vmathT3AbsPerElem_V( VmathTransform3 tfrm );
-
-/*
- * Inverse of a 3x4 transformation matrix
- * NOTE: 
- * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
- */
-static inline VmathTransform3 vmathT3Inverse_V( VmathTransform3 tfrm );
-
-/*
- * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
- */
-static inline VmathTransform3 vmathT3OrthoInverse_V( VmathTransform3 tfrm );
-
-/*
- * Conditionally select between two 3x4 transformation matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathTransform3 vmathT3Select_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x4 transformation matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathT3Print_V( VmathTransform3 tfrm );
-
-/*
- * Print a 3x4 transformation matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathT3Prints_V( VmathTransform3 tfrm, const char *name );
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#include "vectormath_aos.h"
-#include "vec_aos_v.h"
-#include "quat_aos_v.h"
-#include "mat_aos_v.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_AOS_C_V_SPU_H
+#define _VECTORMATH_AOS_C_V_SPU_H
+
+#include <math.h>
+#include <spu_intrinsics.h>
+
+#ifdef _VECTORMATH_DEBUG
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef _VECTORMATH_AOS_C_TYPES_H
+#define _VECTORMATH_AOS_C_TYPES_H
+
+/* A 3-D vector in array-of-structures format
+ */
+typedef struct _VmathVector3
+{
+    vec_float4 vec128;
+} VmathVector3;
+
+/* A 4-D vector in array-of-structures format
+ */
+typedef struct _VmathVector4
+{
+    vec_float4 vec128;
+} VmathVector4;
+
+/* A 3-D point in array-of-structures format
+ */
+typedef struct _VmathPoint3
+{
+    vec_float4 vec128;
+} VmathPoint3;
+
+/* A quaternion in array-of-structures format
+ */
+typedef struct _VmathQuat
+{
+    vec_float4 vec128;
+} VmathQuat;
+
+/* A 3x3 matrix in array-of-structures format
+ */
+typedef struct _VmathMatrix3
+{
+    VmathVector3 col0;
+    VmathVector3 col1;
+    VmathVector3 col2;
+} VmathMatrix3;
+
+/* A 4x4 matrix in array-of-structures format
+ */
+typedef struct _VmathMatrix4
+{
+    VmathVector4 col0;
+    VmathVector4 col1;
+    VmathVector4 col2;
+    VmathVector4 col3;
+} VmathMatrix4;
+
+/* A 3x4 transformation matrix in array-of-structures format
+ */
+typedef struct _VmathTransform3
+{
+    VmathVector3 col0;
+    VmathVector3 col1;
+    VmathVector3 col2;
+    VmathVector3 col3;
+} VmathTransform3;
+
+#endif
+
+/*
+ * Construct a 3-D vector from x, y, and z elements
+ */
+static inline VmathVector3 vmathV3MakeFromElems_V( float x, float y, float z );
+
+/*
+ * Copy elements from a 3-D point into a 3-D vector
+ */
+static inline VmathVector3 vmathV3MakeFromP3_V( VmathPoint3 pnt );
+
+/*
+ * Set all elements of a 3-D vector to the same scalar value
+ */
+static inline VmathVector3 vmathV3MakeFromScalar_V( float scalar );
+
+/*
+ * Set vector float data in a 3-D vector
+ */
+static inline VmathVector3 vmathV3MakeFrom128_V( vec_float4 vf4 );
+
+/*
+ * Get vector float data from a 3-D vector
+ */
+static inline vec_float4 vmathV3Get128_V( VmathVector3 vec );
+
+/*
+ * Set the x element of a 3-D vector
+ */
+static inline void vmathV3SetX_V( VmathVector3 *result, float x );
+
+/*
+ * Set the y element of a 3-D vector
+ */
+static inline void vmathV3SetY_V( VmathVector3 *result, float y );
+
+/*
+ * Set the z element of a 3-D vector
+ */
+static inline void vmathV3SetZ_V( VmathVector3 *result, float z );
+
+/*
+ * Get the x element of a 3-D vector
+ */
+static inline float vmathV3GetX_V( VmathVector3 vec );
+
+/*
+ * Get the y element of a 3-D vector
+ */
+static inline float vmathV3GetY_V( VmathVector3 vec );
+
+/*
+ * Get the z element of a 3-D vector
+ */
+static inline float vmathV3GetZ_V( VmathVector3 vec );
+
+/*
+ * Set an x, y, or z element of a 3-D vector by index
+ */
+static inline void vmathV3SetElem_V( VmathVector3 *result, int idx, float value );
+
+/*
+ * Get an x, y, or z element of a 3-D vector by index
+ */
+static inline float vmathV3GetElem_V( VmathVector3 vec, int idx );
+
+/*
+ * Add two 3-D vectors
+ */
+static inline VmathVector3 vmathV3Add_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Subtract a 3-D vector from another 3-D vector
+ */
+static inline VmathVector3 vmathV3Sub_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Add a 3-D vector to a 3-D point
+ */
+static inline VmathPoint3 vmathV3AddP3_V( VmathVector3 vec, VmathPoint3 pnt );
+
+/*
+ * Multiply a 3-D vector by a scalar
+ */
+static inline VmathVector3 vmathV3ScalarMul_V( VmathVector3 vec, float scalar );
+
+/*
+ * Divide a 3-D vector by a scalar
+ */
+static inline VmathVector3 vmathV3ScalarDiv_V( VmathVector3 vec, float scalar );
+
+/*
+ * Negate all elements of a 3-D vector
+ */
+static inline VmathVector3 vmathV3Neg_V( VmathVector3 vec );
+
+/*
+ * Construct x axis
+ */
+static inline VmathVector3 vmathV3MakeXAxis_V( );
+
+/*
+ * Construct y axis
+ */
+static inline VmathVector3 vmathV3MakeYAxis_V( );
+
+/*
+ * Construct z axis
+ */
+static inline VmathVector3 vmathV3MakeZAxis_V( );
+
+/*
+ * Multiply two 3-D vectors per element
+ */
+static inline VmathVector3 vmathV3MulPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Divide two 3-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathVector3 vmathV3DivPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Compute the reciprocal of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathVector3 vmathV3RecipPerElem_V( VmathVector3 vec );
+
+/*
+ * Compute the square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathVector3 vmathV3SqrtPerElem_V( VmathVector3 vec );
+
+/*
+ * Compute the reciprocal square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathVector3 vmathV3RsqrtPerElem_V( VmathVector3 vec );
+
+/*
+ * Compute the absolute value of a 3-D vector per element
+ */
+static inline VmathVector3 vmathV3AbsPerElem_V( VmathVector3 vec );
+
+/*
+ * Copy sign from one 3-D vector to another, per element
+ */
+static inline VmathVector3 vmathV3CopySignPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Maximum of two 3-D vectors per element
+ */
+static inline VmathVector3 vmathV3MaxPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Minimum of two 3-D vectors per element
+ */
+static inline VmathVector3 vmathV3MinPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Maximum element of a 3-D vector
+ */
+static inline float vmathV3MaxElem_V( VmathVector3 vec );
+
+/*
+ * Minimum element of a 3-D vector
+ */
+static inline float vmathV3MinElem_V( VmathVector3 vec );
+
+/*
+ * Compute the sum of all elements of a 3-D vector
+ */
+static inline float vmathV3Sum_V( VmathVector3 vec );
+
+/*
+ * Compute the dot product of two 3-D vectors
+ */
+static inline float vmathV3Dot_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Compute the square of the length of a 3-D vector
+ */
+static inline float vmathV3LengthSqr_V( VmathVector3 vec );
+
+/*
+ * Compute the length of a 3-D vector
+ */
+static inline float vmathV3Length_V( VmathVector3 vec );
+
+/*
+ * Normalize a 3-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline VmathVector3 vmathV3Normalize_V( VmathVector3 vec );
+
+/*
+ * Compute cross product of two 3-D vectors
+ */
+static inline VmathVector3 vmathV3Cross_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Outer product of two 3-D vectors
+ */
+static inline VmathMatrix3 vmathV3Outer_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Pre-multiply a row vector by a 3x3 matrix
+ * NOTE: 
+ * Slower than column post-multiply.
+ */
+static inline VmathVector3 vmathV3RowMul_V( VmathVector3 vec, VmathMatrix3 mat );
+
+/*
+ * Cross-product matrix of a 3-D vector
+ */
+static inline VmathMatrix3 vmathV3CrossMatrix_V( VmathVector3 vec );
+
+/*
+ * Create cross-product matrix and multiply
+ * NOTE: 
+ * Faster than separately creating a cross-product matrix and multiplying.
+ */
+static inline VmathMatrix3 vmathV3CrossMatrixMul_V( VmathVector3 vec, VmathMatrix3 mat );
+
+/*
+ * Linear interpolation between two 3-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathVector3 vmathV3Lerp_V( float t, VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Spherical linear interpolation between two 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathVector3 vmathV3Slerp_V( float t, VmathVector3 unitVec0, VmathVector3 unitVec1 );
+
+/*
+ * Conditionally select between two 3-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathVector3 vmathV3Select_V( VmathVector3 vec0, VmathVector3 vec1, unsigned int select1 );
+
+/*
+ * Store x, y, and z elements of a 3-D vector in the first three words of a quadword.
+ * The value of the fourth word (the word with the highest address) remains unchanged
+ */
+static inline void vmathV3StoreXYZ_V( VmathVector3 vec, vec_float4 *quad );
+
+/*
+ * Load four three-float 3-D vectors, stored in three quadwords
+ */
+static inline void vmathV3LoadXYZArray_V( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads );
+
+/*
+ * Store four 3-D vectors in three quadwords
+ */
+static inline void vmathV3StoreXYZArray_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, vec_float4 *threeQuads );
+
+/*
+ * Store eight 3-D vectors as half-floats
+ */
+static inline void vmathV3StoreHalfFloats_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, VmathVector3 vec4, VmathVector3 vec5, VmathVector3 vec6, VmathVector3 vec7, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV3Print_V( VmathVector3 vec );
+
+/*
+ * Print a 3-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV3Prints_V( VmathVector3 vec, const char *name );
+
+#endif
+
+/*
+ * Construct a 4-D vector from x, y, z, and w elements
+ */
+static inline VmathVector4 vmathV4MakeFromElems_V( float x, float y, float z, float w );
+
+/*
+ * Construct a 4-D vector from a 3-D vector and a scalar
+ */
+static inline VmathVector4 vmathV4MakeFromV3Scalar_V( VmathVector3 xyz, float w );
+
+/*
+ * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+ */
+static inline VmathVector4 vmathV4MakeFromV3_V( VmathVector3 vec );
+
+/*
+ * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+ */
+static inline VmathVector4 vmathV4MakeFromP3_V( VmathPoint3 pnt );
+
+/*
+ * Copy elements from a quaternion into a 4-D vector
+ */
+static inline VmathVector4 vmathV4MakeFromQ_V( VmathQuat quat );
+
+/*
+ * Set all elements of a 4-D vector to the same scalar value
+ */
+static inline VmathVector4 vmathV4MakeFromScalar_V( float scalar );
+
+/*
+ * Set vector float data in a 4-D vector
+ */
+static inline VmathVector4 vmathV4MakeFrom128_V( vec_float4 vf4 );
+
+/*
+ * Get vector float data from a 4-D vector
+ */
+static inline vec_float4 vmathV4Get128_V( VmathVector4 vec );
+
+/*
+ * Set the x, y, and z elements of a 4-D vector
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathV4SetXYZ_V( VmathVector4 *result, VmathVector3 vec );
+
+/*
+ * Get the x, y, and z elements of a 4-D vector
+ */
+static inline VmathVector3 vmathV4GetXYZ_V( VmathVector4 vec );
+
+/*
+ * Set the x element of a 4-D vector
+ */
+static inline void vmathV4SetX_V( VmathVector4 *result, float x );
+
+/*
+ * Set the y element of a 4-D vector
+ */
+static inline void vmathV4SetY_V( VmathVector4 *result, float y );
+
+/*
+ * Set the z element of a 4-D vector
+ */
+static inline void vmathV4SetZ_V( VmathVector4 *result, float z );
+
+/*
+ * Set the w element of a 4-D vector
+ */
+static inline void vmathV4SetW_V( VmathVector4 *result, float w );
+
+/*
+ * Get the x element of a 4-D vector
+ */
+static inline float vmathV4GetX_V( VmathVector4 vec );
+
+/*
+ * Get the y element of a 4-D vector
+ */
+static inline float vmathV4GetY_V( VmathVector4 vec );
+
+/*
+ * Get the z element of a 4-D vector
+ */
+static inline float vmathV4GetZ_V( VmathVector4 vec );
+
+/*
+ * Get the w element of a 4-D vector
+ */
+static inline float vmathV4GetW_V( VmathVector4 vec );
+
+/*
+ * Set an x, y, z, or w element of a 4-D vector by index
+ */
+static inline void vmathV4SetElem_V( VmathVector4 *result, int idx, float value );
+
+/*
+ * Get an x, y, z, or w element of a 4-D vector by index
+ */
+static inline float vmathV4GetElem_V( VmathVector4 vec, int idx );
+
+/*
+ * Add two 4-D vectors
+ */
+static inline VmathVector4 vmathV4Add_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Subtract a 4-D vector from another 4-D vector
+ */
+static inline VmathVector4 vmathV4Sub_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Multiply a 4-D vector by a scalar
+ */
+static inline VmathVector4 vmathV4ScalarMul_V( VmathVector4 vec, float scalar );
+
+/*
+ * Divide a 4-D vector by a scalar
+ */
+static inline VmathVector4 vmathV4ScalarDiv_V( VmathVector4 vec, float scalar );
+
+/*
+ * Negate all elements of a 4-D vector
+ */
+static inline VmathVector4 vmathV4Neg_V( VmathVector4 vec );
+
+/*
+ * Construct x axis
+ */
+static inline VmathVector4 vmathV4MakeXAxis_V( );
+
+/*
+ * Construct y axis
+ */
+static inline VmathVector4 vmathV4MakeYAxis_V( );
+
+/*
+ * Construct z axis
+ */
+static inline VmathVector4 vmathV4MakeZAxis_V( );
+
+/*
+ * Construct w axis
+ */
+static inline VmathVector4 vmathV4MakeWAxis_V( );
+
+/*
+ * Multiply two 4-D vectors per element
+ */
+static inline VmathVector4 vmathV4MulPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Divide two 4-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathVector4 vmathV4DivPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Compute the reciprocal of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathVector4 vmathV4RecipPerElem_V( VmathVector4 vec );
+
+/*
+ * Compute the square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathVector4 vmathV4SqrtPerElem_V( VmathVector4 vec );
+
+/*
+ * Compute the reciprocal square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathVector4 vmathV4RsqrtPerElem_V( VmathVector4 vec );
+
+/*
+ * Compute the absolute value of a 4-D vector per element
+ */
+static inline VmathVector4 vmathV4AbsPerElem_V( VmathVector4 vec );
+
+/*
+ * Copy sign from one 4-D vector to another, per element
+ */
+static inline VmathVector4 vmathV4CopySignPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Maximum of two 4-D vectors per element
+ */
+static inline VmathVector4 vmathV4MaxPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Minimum of two 4-D vectors per element
+ */
+static inline VmathVector4 vmathV4MinPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Maximum element of a 4-D vector
+ */
+static inline float vmathV4MaxElem_V( VmathVector4 vec );
+
+/*
+ * Minimum element of a 4-D vector
+ */
+static inline float vmathV4MinElem_V( VmathVector4 vec );
+
+/*
+ * Compute the sum of all elements of a 4-D vector
+ */
+static inline float vmathV4Sum_V( VmathVector4 vec );
+
+/*
+ * Compute the dot product of two 4-D vectors
+ */
+static inline float vmathV4Dot_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Compute the square of the length of a 4-D vector
+ */
+static inline float vmathV4LengthSqr_V( VmathVector4 vec );
+
+/*
+ * Compute the length of a 4-D vector
+ */
+static inline float vmathV4Length_V( VmathVector4 vec );
+
+/*
+ * Normalize a 4-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline VmathVector4 vmathV4Normalize_V( VmathVector4 vec );
+
+/*
+ * Outer product of two 4-D vectors
+ */
+static inline VmathMatrix4 vmathV4Outer_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Linear interpolation between two 4-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathVector4 vmathV4Lerp_V( float t, VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Spherical linear interpolation between two 4-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathVector4 vmathV4Slerp_V( float t, VmathVector4 unitVec0, VmathVector4 unitVec1 );
+
+/*
+ * Conditionally select between two 4-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathVector4 vmathV4Select_V( VmathVector4 vec0, VmathVector4 vec1, unsigned int select1 );
+
+/*
+ * Store four 4-D vectors as half-floats
+ */
+static inline void vmathV4StoreHalfFloats_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3, vec_ushort8 *twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV4Print_V( VmathVector4 vec );
+
+/*
+ * Print a 4-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV4Prints_V( VmathVector4 vec, const char *name );
+
+#endif
+
+/*
+ * Construct a 3-D point from x, y, and z elements
+ */
+static inline VmathPoint3 vmathP3MakeFromElems_V( float x, float y, float z );
+
+/*
+ * Copy elements from a 3-D vector into a 3-D point
+ */
+static inline VmathPoint3 vmathP3MakeFromV3_V( VmathVector3 vec );
+
+/*
+ * Set all elements of a 3-D point to the same scalar value
+ */
+static inline VmathPoint3 vmathP3MakeFromScalar_V( float scalar );
+
+/*
+ * Set vector float data in a 3-D point
+ */
+static inline VmathPoint3 vmathP3MakeFrom128_V( vec_float4 vf4 );
+
+/*
+ * Get vector float data from a 3-D point
+ */
+static inline vec_float4 vmathP3Get128_V( VmathPoint3 pnt );
+
+/*
+ * Set the x element of a 3-D point
+ */
+static inline void vmathP3SetX_V( VmathPoint3 *result, float x );
+
+/*
+ * Set the y element of a 3-D point
+ */
+static inline void vmathP3SetY_V( VmathPoint3 *result, float y );
+
+/*
+ * Set the z element of a 3-D point
+ */
+static inline void vmathP3SetZ_V( VmathPoint3 *result, float z );
+
+/*
+ * Get the x element of a 3-D point
+ */
+static inline float vmathP3GetX_V( VmathPoint3 pnt );
+
+/*
+ * Get the y element of a 3-D point
+ */
+static inline float vmathP3GetY_V( VmathPoint3 pnt );
+
+/*
+ * Get the z element of a 3-D point
+ */
+static inline float vmathP3GetZ_V( VmathPoint3 pnt );
+
+/*
+ * Set an x, y, or z element of a 3-D point by index
+ */
+static inline void vmathP3SetElem_V( VmathPoint3 *result, int idx, float value );
+
+/*
+ * Get an x, y, or z element of a 3-D point by index
+ */
+static inline float vmathP3GetElem_V( VmathPoint3 pnt, int idx );
+
+/*
+ * Subtract a 3-D point from another 3-D point
+ */
+static inline VmathVector3 vmathP3Sub_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Add a 3-D point to a 3-D vector
+ */
+static inline VmathPoint3 vmathP3AddV3_V( VmathPoint3 pnt, VmathVector3 vec );
+
+/*
+ * Subtract a 3-D vector from a 3-D point
+ */
+static inline VmathPoint3 vmathP3SubV3_V( VmathPoint3 pnt, VmathVector3 vec );
+
+/*
+ * Multiply two 3-D points per element
+ */
+static inline VmathPoint3 vmathP3MulPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Divide two 3-D points per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathPoint3 vmathP3DivPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Compute the reciprocal of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathPoint3 vmathP3RecipPerElem_V( VmathPoint3 pnt );
+
+/*
+ * Compute the square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathPoint3 vmathP3SqrtPerElem_V( VmathPoint3 pnt );
+
+/*
+ * Compute the reciprocal square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathPoint3 vmathP3RsqrtPerElem_V( VmathPoint3 pnt );
+
+/*
+ * Compute the absolute value of a 3-D point per element
+ */
+static inline VmathPoint3 vmathP3AbsPerElem_V( VmathPoint3 pnt );
+
+/*
+ * Copy sign from one 3-D point to another, per element
+ */
+static inline VmathPoint3 vmathP3CopySignPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Maximum of two 3-D points per element
+ */
+static inline VmathPoint3 vmathP3MaxPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Minimum of two 3-D points per element
+ */
+static inline VmathPoint3 vmathP3MinPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Maximum element of a 3-D point
+ */
+static inline float vmathP3MaxElem_V( VmathPoint3 pnt );
+
+/*
+ * Minimum element of a 3-D point
+ */
+static inline float vmathP3MinElem_V( VmathPoint3 pnt );
+
+/*
+ * Compute the sum of all elements of a 3-D point
+ */
+static inline float vmathP3Sum_V( VmathPoint3 pnt );
+
+/*
+ * Apply uniform scale to a 3-D point
+ */
+static inline VmathPoint3 vmathP3Scale_V( VmathPoint3 pnt, float scaleVal );
+
+/*
+ * Apply non-uniform scale to a 3-D point
+ */
+static inline VmathPoint3 vmathP3NonUniformScale_V( VmathPoint3 pnt, VmathVector3 scaleVec );
+
+/*
+ * Scalar projection of a 3-D point on a unit-length 3-D vector
+ */
+static inline float vmathP3Projection_V( VmathPoint3 pnt, VmathVector3 unitVec );
+
+/*
+ * Compute the square of the distance of a 3-D point from the coordinate-system origin
+ */
+static inline float vmathP3DistSqrFromOrigin_V( VmathPoint3 pnt );
+
+/*
+ * Compute the distance of a 3-D point from the coordinate-system origin
+ */
+static inline float vmathP3DistFromOrigin_V( VmathPoint3 pnt );
+
+/*
+ * Compute the square of the distance between two 3-D points
+ */
+static inline float vmathP3DistSqr_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Compute the distance between two 3-D points
+ */
+static inline float vmathP3Dist_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Linear interpolation between two 3-D points
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathPoint3 vmathP3Lerp_V( float t, VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Conditionally select between two 3-D points
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathPoint3 vmathP3Select_V( VmathPoint3 pnt0, VmathPoint3 pnt1, unsigned int select1 );
+
+/*
+ * Store x, y, and z elements of a 3-D point in the first three words of a quadword.
+ * The value of the fourth word (the word with the highest address) remains unchanged
+ */
+static inline void vmathP3StoreXYZ_V( VmathPoint3 pnt, vec_float4 *quad );
+
+/*
+ * Load four three-float 3-D points, stored in three quadwords
+ */
+static inline void vmathP3LoadXYZArray_V( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads );
+
+/*
+ * Store four 3-D points in three quadwords
+ */
+static inline void vmathP3StoreXYZArray_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, vec_float4 *threeQuads );
+
+/*
+ * Store eight 3-D points as half-floats
+ */
+static inline void vmathP3StoreHalfFloats_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, VmathPoint3 pnt4, VmathPoint3 pnt5, VmathPoint3 pnt6, VmathPoint3 pnt7, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D point
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathP3Print_V( VmathPoint3 pnt );
+
+/*
+ * Print a 3-D point and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathP3Prints_V( VmathPoint3 pnt, const char *name );
+
+#endif
+
+/*
+ * Construct a quaternion from x, y, z, and w elements
+ */
+static inline VmathQuat vmathQMakeFromElems_V( float x, float y, float z, float w );
+
+/*
+ * Construct a quaternion from a 3-D vector and a scalar
+ */
+static inline VmathQuat vmathQMakeFromV3Scalar_V( VmathVector3 xyz, float w );
+
+/*
+ * Copy elements from a 4-D vector into a quaternion
+ */
+static inline VmathQuat vmathQMakeFromV4_V( VmathVector4 vec );
+
+/*
+ * Convert a rotation matrix to a unit-length quaternion
+ */
+static inline VmathQuat vmathQMakeFromM3_V( VmathMatrix3 rotMat );
+
+/*
+ * Set all elements of a quaternion to the same scalar value
+ */
+static inline VmathQuat vmathQMakeFromScalar_V( float scalar );
+
+/*
+ * Set vector float data in a quaternion
+ */
+static inline VmathQuat vmathQMakeFrom128_V( vec_float4 vf4 );
+
+/*
+ * Get vector float data from a quaternion
+ */
+static inline vec_float4 vmathQGet128_V( VmathQuat quat );
+
+/*
+ * Set the x, y, and z elements of a quaternion
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathQSetXYZ_V( VmathQuat *result, VmathVector3 vec );
+
+/*
+ * Get the x, y, and z elements of a quaternion
+ */
+static inline VmathVector3 vmathQGetXYZ_V( VmathQuat quat );
+
+/*
+ * Set the x element of a quaternion
+ */
+static inline void vmathQSetX_V( VmathQuat *result, float x );
+
+/*
+ * Set the y element of a quaternion
+ */
+static inline void vmathQSetY_V( VmathQuat *result, float y );
+
+/*
+ * Set the z element of a quaternion
+ */
+static inline void vmathQSetZ_V( VmathQuat *result, float z );
+
+/*
+ * Set the w element of a quaternion
+ */
+static inline void vmathQSetW_V( VmathQuat *result, float w );
+
+/*
+ * Get the x element of a quaternion
+ */
+static inline float vmathQGetX_V( VmathQuat quat );
+
+/*
+ * Get the y element of a quaternion
+ */
+static inline float vmathQGetY_V( VmathQuat quat );
+
+/*
+ * Get the z element of a quaternion
+ */
+static inline float vmathQGetZ_V( VmathQuat quat );
+
+/*
+ * Get the w element of a quaternion
+ */
+static inline float vmathQGetW_V( VmathQuat quat );
+
+/*
+ * Set an x, y, z, or w element of a quaternion by index
+ */
+static inline void vmathQSetElem_V( VmathQuat *result, int idx, float value );
+
+/*
+ * Get an x, y, z, or w element of a quaternion by index
+ */
+static inline float vmathQGetElem_V( VmathQuat quat, int idx );
+
+/*
+ * Add two quaternions
+ */
+static inline VmathQuat vmathQAdd_V( VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Subtract a quaternion from another quaternion
+ */
+static inline VmathQuat vmathQSub_V( VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Multiply two quaternions
+ */
+static inline VmathQuat vmathQMul_V( VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Multiply a quaternion by a scalar
+ */
+static inline VmathQuat vmathQScalarMul_V( VmathQuat quat, float scalar );
+
+/*
+ * Divide a quaternion by a scalar
+ */
+static inline VmathQuat vmathQScalarDiv_V( VmathQuat quat, float scalar );
+
+/*
+ * Negate all elements of a quaternion
+ */
+static inline VmathQuat vmathQNeg_V( VmathQuat quat );
+
+/*
+ * Construct an identity quaternion
+ */
+static inline VmathQuat vmathQMakeIdentity_V( );
+
+/*
+ * Construct a quaternion to rotate between two unit-length 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+ */
+static inline VmathQuat vmathQMakeRotationArc_V( VmathVector3 unitVec0, VmathVector3 unitVec1 );
+
+/*
+ * Construct a quaternion to rotate around a unit-length 3-D vector
+ */
+static inline VmathQuat vmathQMakeRotationAxis_V( float radians, VmathVector3 unitVec );
+
+/*
+ * Construct a quaternion to rotate around the x axis
+ */
+static inline VmathQuat vmathQMakeRotationX_V( float radians );
+
+/*
+ * Construct a quaternion to rotate around the y axis
+ */
+static inline VmathQuat vmathQMakeRotationY_V( float radians );
+
+/*
+ * Construct a quaternion to rotate around the z axis
+ */
+static inline VmathQuat vmathQMakeRotationZ_V( float radians );
+
+/*
+ * Compute the conjugate of a quaternion
+ */
+static inline VmathQuat vmathQConj_V( VmathQuat quat );
+
+/*
+ * Use a unit-length quaternion to rotate a 3-D vector
+ */
+static inline VmathVector3 vmathQRotate_V( VmathQuat unitQuat, VmathVector3 vec );
+
+/*
+ * Compute the dot product of two quaternions
+ */
+static inline float vmathQDot_V( VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Compute the norm of a quaternion
+ */
+static inline float vmathQNorm_V( VmathQuat quat );
+
+/*
+ * Compute the length of a quaternion
+ */
+static inline float vmathQLength_V( VmathQuat quat );
+
+/*
+ * Normalize a quaternion
+ * NOTE: 
+ * The result is unpredictable when all elements of quat are at or near zero.
+ */
+static inline VmathQuat vmathQNormalize_V( VmathQuat quat );
+
+/*
+ * Linear interpolation between two quaternions
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathQuat vmathQLerp_V( float t, VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Spherical linear interpolation between two quaternions
+ * NOTE: 
+ * Interpolates along the shortest path between orientations.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathQuat vmathQSlerp_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1 );
+
+/*
+ * Spherical quadrangle interpolation
+ */
+static inline VmathQuat vmathQSquad_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1, VmathQuat unitQuat2, VmathQuat unitQuat3 );
+
+/*
+ * Conditionally select between two quaternions
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathQuat vmathQSelect_V( VmathQuat quat0, VmathQuat quat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a quaternion
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathQPrint_V( VmathQuat quat );
+
+/*
+ * Print a quaternion and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathQPrints_V( VmathQuat quat, const char *name );
+
+#endif
+
+/*
+ * Construct a 3x3 matrix containing the specified columns
+ */
+static inline VmathMatrix3 vmathM3MakeFromCols_V( VmathVector3 col0, VmathVector3 col1, VmathVector3 col2 );
+
+/*
+ * Construct a 3x3 rotation matrix from a unit-length quaternion
+ */
+static inline VmathMatrix3 vmathM3MakeFromQ_V( VmathQuat unitQuat );
+
+/*
+ * Set all elements of a 3x3 matrix to the same scalar value
+ */
+static inline VmathMatrix3 vmathM3MakeFromScalar_V( float scalar );
+
+/*
+ * Set column 0 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol0_V( VmathMatrix3 *result, VmathVector3 col0 );
+
+/*
+ * Set column 1 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol1_V( VmathMatrix3 *result, VmathVector3 col1 );
+
+/*
+ * Set column 2 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol2_V( VmathMatrix3 *result, VmathVector3 col2 );
+
+/*
+ * Get column 0 of a 3x3 matrix
+ */
+static inline VmathVector3 vmathM3GetCol0_V( VmathMatrix3 mat );
+
+/*
+ * Get column 1 of a 3x3 matrix
+ */
+static inline VmathVector3 vmathM3GetCol1_V( VmathMatrix3 mat );
+
+/*
+ * Get column 2 of a 3x3 matrix
+ */
+static inline VmathVector3 vmathM3GetCol2_V( VmathMatrix3 mat );
+
+/*
+ * Set the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3SetCol_V( VmathMatrix3 *result, int col, VmathVector3 vec );
+
+/*
+ * Set the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3SetRow_V( VmathMatrix3 *result, int row, VmathVector3 vec );
+
+/*
+ * Get the column of a 3x3 matrix referred to by the specified index
+ */
+static inline VmathVector3 vmathM3GetCol_V( VmathMatrix3 mat, int col );
+
+/*
+ * Get the row of a 3x3 matrix referred to by the specified index
+ */
+static inline VmathVector3 vmathM3GetRow_V( VmathMatrix3 mat, int row );
+
+/*
+ * Set the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline void vmathM3SetElem_V( VmathMatrix3 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline float vmathM3GetElem_V( VmathMatrix3 mat, int col, int row );
+
+/*
+ * Add two 3x3 matrices
+ */
+static inline VmathMatrix3 vmathM3Add_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
+
+/*
+ * Subtract a 3x3 matrix from another 3x3 matrix
+ */
+static inline VmathMatrix3 vmathM3Sub_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
+
+/*
+ * Negate all elements of a 3x3 matrix
+ */
+static inline VmathMatrix3 vmathM3Neg_V( VmathMatrix3 mat );
+
+/*
+ * Multiply a 3x3 matrix by a scalar
+ */
+static inline VmathMatrix3 vmathM3ScalarMul_V( VmathMatrix3 mat, float scalar );
+
+/*
+ * Multiply a 3x3 matrix by a 3-D vector
+ */
+static inline VmathVector3 vmathM3MulV3_V( VmathMatrix3 mat, VmathVector3 vec );
+
+/*
+ * Multiply two 3x3 matrices
+ */
+static inline VmathMatrix3 vmathM3Mul_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
+
+/*
+ * Construct an identity 3x3 matrix
+ */
+static inline VmathMatrix3 vmathM3MakeIdentity_V( );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x axis
+ */
+static inline VmathMatrix3 vmathM3MakeRotationX_V( float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the y axis
+ */
+static inline VmathMatrix3 vmathM3MakeRotationY_V( float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the z axis
+ */
+static inline VmathMatrix3 vmathM3MakeRotationZ_V( float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x, y, and z axes
+ */
+static inline VmathMatrix3 vmathM3MakeRotationZYX_V( VmathVector3 radiansXYZ );
+
+/*
+ * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathMatrix3 vmathM3MakeRotationAxis_V( float radians, VmathVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathMatrix3 vmathM3MakeRotationQ_V( VmathQuat unitQuat );
+
+/*
+ * Construct a 3x3 matrix to perform scaling
+ */
+static inline VmathMatrix3 vmathM3MakeScale_V( VmathVector3 scaleVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathMatrix3 vmathM3AppendScale_V( VmathMatrix3 mat, VmathVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathMatrix3 vmathM3PrependScale_V( VmathVector3 scaleVec, VmathMatrix3 mat );
+
+/*
+ * Multiply two 3x3 matrices per element
+ */
+static inline VmathMatrix3 vmathM3MulPerElem_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
+
+/*
+ * Compute the absolute value of a 3x3 matrix per element
+ */
+static inline VmathMatrix3 vmathM3AbsPerElem_V( VmathMatrix3 mat );
+
+/*
+ * Transpose of a 3x3 matrix
+ */
+static inline VmathMatrix3 vmathM3Transpose_V( VmathMatrix3 mat );
+
+/*
+ * Compute the inverse of a 3x3 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathMatrix3 vmathM3Inverse_V( VmathMatrix3 mat );
+
+/*
+ * Determinant of a 3x3 matrix
+ */
+static inline float vmathM3Determinant_V( VmathMatrix3 mat );
+
+/*
+ * Conditionally select between two 3x3 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathMatrix3 vmathM3Select_V( VmathMatrix3 mat0, VmathMatrix3 mat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x3 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM3Print_V( VmathMatrix3 mat );
+
+/*
+ * Print a 3x3 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM3Prints_V( VmathMatrix3 mat, const char *name );
+
+#endif
+
+/*
+ * Construct a 4x4 matrix containing the specified columns
+ */
+static inline VmathMatrix4 vmathM4MakeFromCols_V( VmathVector4 col0, VmathVector4 col1, VmathVector4 col2, VmathVector4 col3 );
+
+/*
+ * Construct a 4x4 matrix from a 3x4 transformation matrix
+ */
+static inline VmathMatrix4 vmathM4MakeFromT3_V( VmathTransform3 mat );
+
+/*
+ * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline VmathMatrix4 vmathM4MakeFromM3V3_V( VmathMatrix3 mat, VmathVector3 translateVec );
+
+/*
+ * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline VmathMatrix4 vmathM4MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec );
+
+/*
+ * Set all elements of a 4x4 matrix to the same scalar value
+ */
+static inline VmathMatrix4 vmathM4MakeFromScalar_V( float scalar );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathM4SetUpper3x3_V( VmathMatrix4 *result, VmathMatrix3 mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 4x4 matrix
+ */
+static inline VmathMatrix3 vmathM4GetUpper3x3_V( VmathMatrix4 mat );
+
+/*
+ * Set translation component
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathM4SetTranslation_V( VmathMatrix4 *result, VmathVector3 translateVec );
+
+/*
+ * Get the translation component of a 4x4 matrix
+ */
+static inline VmathVector3 vmathM4GetTranslation_V( VmathMatrix4 mat );
+
+/*
+ * Set column 0 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol0_V( VmathMatrix4 *result, VmathVector4 col0 );
+
+/*
+ * Set column 1 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol1_V( VmathMatrix4 *result, VmathVector4 col1 );
+
+/*
+ * Set column 2 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol2_V( VmathMatrix4 *result, VmathVector4 col2 );
+
+/*
+ * Set column 3 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol3_V( VmathMatrix4 *result, VmathVector4 col3 );
+
+/*
+ * Get column 0 of a 4x4 matrix
+ */
+static inline VmathVector4 vmathM4GetCol0_V( VmathMatrix4 mat );
+
+/*
+ * Get column 1 of a 4x4 matrix
+ */
+static inline VmathVector4 vmathM4GetCol1_V( VmathMatrix4 mat );
+
+/*
+ * Get column 2 of a 4x4 matrix
+ */
+static inline VmathVector4 vmathM4GetCol2_V( VmathMatrix4 mat );
+
+/*
+ * Get column 3 of a 4x4 matrix
+ */
+static inline VmathVector4 vmathM4GetCol3_V( VmathMatrix4 mat );
+
+/*
+ * Set the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4SetCol_V( VmathMatrix4 *result, int col, VmathVector4 vec );
+
+/*
+ * Set the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4SetRow_V( VmathMatrix4 *result, int row, VmathVector4 vec );
+
+/*
+ * Get the column of a 4x4 matrix referred to by the specified index
+ */
+static inline VmathVector4 vmathM4GetCol_V( VmathMatrix4 mat, int col );
+
+/*
+ * Get the row of a 4x4 matrix referred to by the specified index
+ */
+static inline VmathVector4 vmathM4GetRow_V( VmathMatrix4 mat, int row );
+
+/*
+ * Set the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline void vmathM4SetElem_V( VmathMatrix4 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline float vmathM4GetElem_V( VmathMatrix4 mat, int col, int row );
+
+/*
+ * Add two 4x4 matrices
+ */
+static inline VmathMatrix4 vmathM4Add_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
+
+/*
+ * Subtract a 4x4 matrix from another 4x4 matrix
+ */
+static inline VmathMatrix4 vmathM4Sub_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
+
+/*
+ * Negate all elements of a 4x4 matrix
+ */
+static inline VmathMatrix4 vmathM4Neg_V( VmathMatrix4 mat );
+
+/*
+ * Multiply a 4x4 matrix by a scalar
+ */
+static inline VmathMatrix4 vmathM4ScalarMul_V( VmathMatrix4 mat, float scalar );
+
+/*
+ * Multiply a 4x4 matrix by a 4-D vector
+ */
+static inline VmathVector4 vmathM4MulV4_V( VmathMatrix4 mat, VmathVector4 vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D vector
+ */
+static inline VmathVector4 vmathM4MulV3_V( VmathMatrix4 mat, VmathVector3 vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D point
+ */
+static inline VmathVector4 vmathM4MulP3_V( VmathMatrix4 mat, VmathPoint3 pnt );
+
+/*
+ * Multiply two 4x4 matrices
+ */
+static inline VmathMatrix4 vmathM4Mul_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
+
+/*
+ * Multiply a 4x4 matrix by a 3x4 transformation matrix
+ */
+static inline VmathMatrix4 vmathM4MulT3_V( VmathMatrix4 mat, VmathTransform3 tfrm );
+
+/*
+ * Construct an identity 4x4 matrix
+ */
+static inline VmathMatrix4 vmathM4MakeIdentity_V( );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x axis
+ */
+static inline VmathMatrix4 vmathM4MakeRotationX_V( float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the y axis
+ */
+static inline VmathMatrix4 vmathM4MakeRotationY_V( float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the z axis
+ */
+static inline VmathMatrix4 vmathM4MakeRotationZ_V( float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x, y, and z axes
+ */
+static inline VmathMatrix4 vmathM4MakeRotationZYX_V( VmathVector3 radiansXYZ );
+
+/*
+ * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathMatrix4 vmathM4MakeRotationAxis_V( float radians, VmathVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathMatrix4 vmathM4MakeRotationQ_V( VmathQuat unitQuat );
+
+/*
+ * Construct a 4x4 matrix to perform scaling
+ */
+static inline VmathMatrix4 vmathM4MakeScale_V( VmathVector3 scaleVec );
+
+/*
+ * Construct a 4x4 matrix to perform translation
+ */
+static inline VmathMatrix4 vmathM4MakeTranslation_V( VmathVector3 translateVec );
+
+/*
+ * Construct viewing matrix based on eye position, position looked at, and up direction
+ */
+static inline VmathMatrix4 vmathM4MakeLookAt_V( VmathPoint3 eyePos, VmathPoint3 lookAtPos, VmathVector3 upVec );
+
+/*
+ * Construct a perspective projection matrix
+ */
+static inline VmathMatrix4 vmathM4MakePerspective_V( float fovyRadians, float aspect, float zNear, float zFar );
+
+/*
+ * Construct a perspective projection matrix based on frustum
+ */
+static inline VmathMatrix4 vmathM4MakeFrustum_V( float left, float right, float bottom, float top, float zNear, float zFar );
+
+/*
+ * Construct an orthographic projection matrix
+ */
+static inline VmathMatrix4 vmathM4MakeOrthographic_V( float left, float right, float bottom, float top, float zNear, float zFar );
+
+/*
+ * Append (post-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathMatrix4 vmathM4AppendScale_V( VmathMatrix4 mat, VmathVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathMatrix4 vmathM4PrependScale_V( VmathVector3 scaleVec, VmathMatrix4 mat );
+
+/*
+ * Multiply two 4x4 matrices per element
+ */
+static inline VmathMatrix4 vmathM4MulPerElem_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
+
+/*
+ * Compute the absolute value of a 4x4 matrix per element
+ */
+static inline VmathMatrix4 vmathM4AbsPerElem_V( VmathMatrix4 mat );
+
+/*
+ * Transpose of a 4x4 matrix
+ */
+static inline VmathMatrix4 vmathM4Transpose_V( VmathMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathMatrix4 vmathM4Inverse_V( VmathMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathMatrix4 vmathM4AffineInverse_V( VmathMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+ */
+static inline VmathMatrix4 vmathM4OrthoInverse_V( VmathMatrix4 mat );
+
+/*
+ * Determinant of a 4x4 matrix
+ */
+static inline float vmathM4Determinant_V( VmathMatrix4 mat );
+
+/*
+ * Conditionally select between two 4x4 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathMatrix4 vmathM4Select_V( VmathMatrix4 mat0, VmathMatrix4 mat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4x4 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM4Print_V( VmathMatrix4 mat );
+
+/*
+ * Print a 4x4 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM4Prints_V( VmathMatrix4 mat, const char *name );
+
+#endif
+
+/*
+ * Construct a 3x4 transformation matrix containing the specified columns
+ */
+static inline VmathTransform3 vmathT3MakeFromCols_V( VmathVector3 col0, VmathVector3 col1, VmathVector3 col2, VmathVector3 col3 );
+
+/*
+ * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline VmathTransform3 vmathT3MakeFromM3V3_V( VmathMatrix3 tfrm, VmathVector3 translateVec );
+
+/*
+ * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline VmathTransform3 vmathT3MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec );
+
+/*
+ * Set all elements of a 3x4 transformation matrix to the same scalar value
+ */
+static inline VmathTransform3 vmathT3MakeFromScalar_V( float scalar );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ */
+static inline void vmathT3SetUpper3x3_V( VmathTransform3 *result, VmathMatrix3 mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+ */
+static inline VmathMatrix3 vmathT3GetUpper3x3_V( VmathTransform3 tfrm );
+
+/*
+ * Set translation component
+ */
+static inline void vmathT3SetTranslation_V( VmathTransform3 *result, VmathVector3 translateVec );
+
+/*
+ * Get the translation component of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetTranslation_V( VmathTransform3 tfrm );
+
+/*
+ * Set column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol0_V( VmathTransform3 *result, VmathVector3 col0 );
+
+/*
+ * Set column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol1_V( VmathTransform3 *result, VmathVector3 col1 );
+
+/*
+ * Set column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol2_V( VmathTransform3 *result, VmathVector3 col2 );
+
+/*
+ * Set column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol3_V( VmathTransform3 *result, VmathVector3 col3 );
+
+/*
+ * Get column 0 of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetCol0_V( VmathTransform3 tfrm );
+
+/*
+ * Get column 1 of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetCol1_V( VmathTransform3 tfrm );
+
+/*
+ * Get column 2 of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetCol2_V( VmathTransform3 tfrm );
+
+/*
+ * Get column 3 of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetCol3_V( VmathTransform3 tfrm );
+
+/*
+ * Set the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3SetCol_V( VmathTransform3 *result, int col, VmathVector3 vec );
+
+/*
+ * Set the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3SetRow_V( VmathTransform3 *result, int row, VmathVector4 vec );
+
+/*
+ * Get the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline VmathVector3 vmathT3GetCol_V( VmathTransform3 tfrm, int col );
+
+/*
+ * Get the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline VmathVector4 vmathT3GetRow_V( VmathTransform3 tfrm, int row );
+
+/*
+ * Set the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline void vmathT3SetElem_V( VmathTransform3 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline float vmathT3GetElem_V( VmathTransform3 tfrm, int col, int row );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D vector
+ */
+static inline VmathVector3 vmathT3MulV3_V( VmathTransform3 tfrm, VmathVector3 vec );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D point
+ */
+static inline VmathPoint3 vmathT3MulP3_V( VmathTransform3 tfrm, VmathPoint3 pnt );
+
+/*
+ * Multiply two 3x4 transformation matrices
+ */
+static inline VmathTransform3 vmathT3Mul_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 );
+
+/*
+ * Construct an identity 3x4 transformation matrix
+ */
+static inline VmathTransform3 vmathT3MakeIdentity_V( );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x axis
+ */
+static inline VmathTransform3 vmathT3MakeRotationX_V( float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the y axis
+ */
+static inline VmathTransform3 vmathT3MakeRotationY_V( float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the z axis
+ */
+static inline VmathTransform3 vmathT3MakeRotationZ_V( float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+ */
+static inline VmathTransform3 vmathT3MakeRotationZYX_V( VmathVector3 radiansXYZ );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathTransform3 vmathT3MakeRotationAxis_V( float radians, VmathVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathTransform3 vmathT3MakeRotationQ_V( VmathQuat unitQuat );
+
+/*
+ * Construct a 3x4 transformation matrix to perform scaling
+ */
+static inline VmathTransform3 vmathT3MakeScale_V( VmathVector3 scaleVec );
+
+/*
+ * Construct a 3x4 transformation matrix to perform translation
+ */
+static inline VmathTransform3 vmathT3MakeTranslation_V( VmathVector3 translateVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathTransform3 vmathT3AppendScale_V( VmathTransform3 tfrm, VmathVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathTransform3 vmathT3PrependScale_V( VmathVector3 scaleVec, VmathTransform3 tfrm );
+
+/*
+ * Multiply two 3x4 transformation matrices per element
+ */
+static inline VmathTransform3 vmathT3MulPerElem_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 );
+
+/*
+ * Compute the absolute value of a 3x4 transformation matrix per element
+ */
+static inline VmathTransform3 vmathT3AbsPerElem_V( VmathTransform3 tfrm );
+
+/*
+ * Inverse of a 3x4 transformation matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+ */
+static inline VmathTransform3 vmathT3Inverse_V( VmathTransform3 tfrm );
+
+/*
+ * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+ */
+static inline VmathTransform3 vmathT3OrthoInverse_V( VmathTransform3 tfrm );
+
+/*
+ * Conditionally select between two 3x4 transformation matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathTransform3 vmathT3Select_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x4 transformation matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathT3Print_V( VmathTransform3 tfrm );
+
+/*
+ * Print a 3x4 transformation matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathT3Prints_V( VmathTransform3 tfrm, const char *name );
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#include "vectormath_aos.h"
+#include "vec_aos_v.h"
+#include "quat_aos_v.h"
+#include "mat_aos_v.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_soa.h b/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_soa.h
index ad1e841d3..6aa9b6ae7 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_soa.h
@@ -1,2012 +1,2012 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_SOA_C_SPU_H
-#define _VECTORMATH_SOA_C_SPU_H
-
-#include <math.h>
-#include <spu_intrinsics.h>
-#include "vectormath_aos.h"
-
-#ifdef _VECTORMATH_DEBUG
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#ifndef _VECTORMATH_SOA_C_TYPES_H
-#define _VECTORMATH_SOA_C_TYPES_H
-
-/* A set of four 3-D vectors in structure-of-arrays format
- */
-typedef struct _VmathSoaVector3
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-} VmathSoaVector3;
-
-/* A set of four 4-D vectors in structure-of-arrays format
- */
-typedef struct _VmathSoaVector4
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-    vec_float4 w;
-} VmathSoaVector4;
-
-/* A set of four 3-D points in structure-of-arrays format
- */
-typedef struct _VmathSoaPoint3
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-} VmathSoaPoint3;
-
-/* A set of four quaternions in structure-of-arrays format
- */
-typedef struct _VmathSoaQuat
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-    vec_float4 w;
-} VmathSoaQuat;
-
-/* A set of four 3x3 matrices in structure-of-arrays format
- */
-typedef struct _VmathSoaMatrix3
-{
-    VmathSoaVector3 col0;
-    VmathSoaVector3 col1;
-    VmathSoaVector3 col2;
-} VmathSoaMatrix3;
-
-/* A set of four 4x4 matrices in structure-of-arrays format
- */
-typedef struct _VmathSoaMatrix4
-{
-    VmathSoaVector4 col0;
-    VmathSoaVector4 col1;
-    VmathSoaVector4 col2;
-    VmathSoaVector4 col3;
-} VmathSoaMatrix4;
-
-/* A set of four 3x4 transformation matrices in structure-of-arrays format
- */
-typedef struct _VmathSoaTransform3
-{
-    VmathSoaVector3 col0;
-    VmathSoaVector3 col1;
-    VmathSoaVector3 col2;
-    VmathSoaVector3 col3;
-} VmathSoaTransform3;
-
-#endif
-
-/*
- * Copy a 3-D vector
- */
-static inline void vmathSoaV3Copy( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Construct a 3-D vector from x, y, and z elements
- */
-static inline void vmathSoaV3MakeFromElems( VmathSoaVector3 *result, vec_float4 x, vec_float4 y, vec_float4 z );
-
-/*
- * Copy elements from a 3-D point into a 3-D vector
- */
-static inline void vmathSoaV3MakeFromP3( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Set all elements of a 3-D vector to the same scalar value
- */
-static inline void vmathSoaV3MakeFromScalar( VmathSoaVector3 *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS 3-D vector
- */
-static inline void vmathSoaV3MakeFromAos( VmathSoaVector3 *result, const VmathVector3 *vec );
-
-/*
- * Insert four AoS 3-D vectors
- */
-static inline void vmathSoaV3MakeFrom4Aos( VmathSoaVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3 );
-
-/*
- * Extract four AoS 3-D vectors
- */
-static inline void vmathSoaV3Get4Aos( const VmathSoaVector3 *vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 );
-
-/*
- * Set the x element of a 3-D vector
- */
-static inline void vmathSoaV3SetX( VmathSoaVector3 *result, vec_float4 x );
-
-/*
- * Set the y element of a 3-D vector
- */
-static inline void vmathSoaV3SetY( VmathSoaVector3 *result, vec_float4 y );
-
-/*
- * Set the z element of a 3-D vector
- */
-static inline void vmathSoaV3SetZ( VmathSoaVector3 *result, vec_float4 z );
-
-/*
- * Get the x element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3GetX( const VmathSoaVector3 *vec );
-
-/*
- * Get the y element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3GetY( const VmathSoaVector3 *vec );
-
-/*
- * Get the z element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3GetZ( const VmathSoaVector3 *vec );
-
-/*
- * Set an x, y, or z element of a 3-D vector by index
- */
-static inline void vmathSoaV3SetElem( VmathSoaVector3 *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, or z element of a 3-D vector by index
- */
-static inline vec_float4 vmathSoaV3GetElem( const VmathSoaVector3 *vec, int idx );
-
-/*
- * Add two 3-D vectors
- */
-static inline void vmathSoaV3Add( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Subtract a 3-D vector from another 3-D vector
- */
-static inline void vmathSoaV3Sub( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Add a 3-D vector to a 3-D point
- */
-static inline void vmathSoaV3AddP3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec, const VmathSoaPoint3 *pnt );
-
-/*
- * Multiply a 3-D vector by a scalar
- */
-static inline void vmathSoaV3ScalarMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar );
-
-/*
- * Divide a 3-D vector by a scalar
- */
-static inline void vmathSoaV3ScalarDiv( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar );
-
-/*
- * Negate all elements of a 3-D vector
- */
-static inline void vmathSoaV3Neg( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Construct x axis
- */
-static inline void vmathSoaV3MakeXAxis( VmathSoaVector3 *result );
-
-/*
- * Construct y axis
- */
-static inline void vmathSoaV3MakeYAxis( VmathSoaVector3 *result );
-
-/*
- * Construct z axis
- */
-static inline void vmathSoaV3MakeZAxis( VmathSoaVector3 *result );
-
-/*
- * Multiply two 3-D vectors per element
- */
-static inline void vmathSoaV3MulPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Divide two 3-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathSoaV3DivPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Compute the reciprocal of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathSoaV3RecipPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Compute the square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathSoaV3SqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Compute the reciprocal square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathSoaV3RsqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Compute the absolute value of a 3-D vector per element
- */
-static inline void vmathSoaV3AbsPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Copy sign from one 3-D vector to another, per element
- */
-static inline void vmathSoaV3CopySignPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Maximum of two 3-D vectors per element
- */
-static inline void vmathSoaV3MaxPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Minimum of two 3-D vectors per element
- */
-static inline void vmathSoaV3MinPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Maximum element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3MaxElem( const VmathSoaVector3 *vec );
-
-/*
- * Minimum element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3MinElem( const VmathSoaVector3 *vec );
-
-/*
- * Compute the sum of all elements of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3Sum( const VmathSoaVector3 *vec );
-
-/*
- * Compute the dot product of two 3-D vectors
- */
-static inline vec_float4 vmathSoaV3Dot( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Compute the square of the length of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3LengthSqr( const VmathSoaVector3 *vec );
-
-/*
- * Compute the length of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3Length( const VmathSoaVector3 *vec );
-
-/*
- * Normalize a 3-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline void vmathSoaV3Normalize( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Compute cross product of two 3-D vectors
- */
-static inline void vmathSoaV3Cross( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Outer product of two 3-D vectors
- */
-static inline void vmathSoaV3Outer( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Pre-multiply a row vector by a 3x3 matrix
- */
-static inline void vmathSoaV3RowMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat );
-
-/*
- * Cross-product matrix of a 3-D vector
- */
-static inline void vmathSoaV3CrossMatrix( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Create cross-product matrix and multiply
- * NOTE: 
- * Faster than separately creating a cross-product matrix and multiplying.
- */
-static inline void vmathSoaV3CrossMatrixMul( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat );
-
-/*
- * Linear interpolation between two 3-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaV3Lerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Spherical linear interpolation between two 3-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaV3Slerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 );
-
-/*
- * Conditionally select between two 3-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaV3Select( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_uint4 select1 );
-
-/*
- * Load four three-float 3-D vectors, stored in three quadwords
- */
-static inline void vmathSoaV3LoadXYZArray( VmathSoaVector3 *vec, const vec_float4 *threeQuads );
-
-/*
- * Store four slots of an SoA 3-D vector in three quadwords
- */
-static inline void vmathSoaV3StoreXYZArray( const VmathSoaVector3 *vec, vec_float4 *threeQuads );
-
-/*
- * Store eight slots of two SoA 3-D vectors as half-floats
- */
-static inline void vmathSoaV3StoreHalfFloats( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV3Print( const VmathSoaVector3 *vec );
-
-/*
- * Print a 3-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV3Prints( const VmathSoaVector3 *vec, const char *name );
-
-#endif
-
-/*
- * Copy a 4-D vector
- */
-static inline void vmathSoaV4Copy( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Construct a 4-D vector from x, y, z, and w elements
- */
-static inline void vmathSoaV4MakeFromElems( VmathSoaVector4 *result, vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
-
-/*
- * Construct a 4-D vector from a 3-D vector and a scalar
- */
-static inline void vmathSoaV4MakeFromV3Scalar( VmathSoaVector4 *result, const VmathSoaVector3 *xyz, vec_float4 w );
-
-/*
- * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
- */
-static inline void vmathSoaV4MakeFromV3( VmathSoaVector4 *result, const VmathSoaVector3 *vec );
-
-/*
- * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
- */
-static inline void vmathSoaV4MakeFromP3( VmathSoaVector4 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Copy elements from a quaternion into a 4-D vector
- */
-static inline void vmathSoaV4MakeFromQ( VmathSoaVector4 *result, const VmathSoaQuat *quat );
-
-/*
- * Set all elements of a 4-D vector to the same scalar value
- */
-static inline void vmathSoaV4MakeFromScalar( VmathSoaVector4 *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS 4-D vector
- */
-static inline void vmathSoaV4MakeFromAos( VmathSoaVector4 *result, const VmathVector4 *vec );
-
-/*
- * Insert four AoS 4-D vectors
- */
-static inline void vmathSoaV4MakeFrom4Aos( VmathSoaVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3 );
-
-/*
- * Extract four AoS 4-D vectors
- */
-static inline void vmathSoaV4Get4Aos( const VmathSoaVector4 *vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 );
-
-/*
- * Set the x, y, and z elements of a 4-D vector
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathSoaV4SetXYZ( VmathSoaVector4 *result, const VmathSoaVector3 *vec );
-
-/*
- * Get the x, y, and z elements of a 4-D vector
- */
-static inline void vmathSoaV4GetXYZ( VmathSoaVector3 *result, const VmathSoaVector4 *vec );
-
-/*
- * Set the x element of a 4-D vector
- */
-static inline void vmathSoaV4SetX( VmathSoaVector4 *result, vec_float4 x );
-
-/*
- * Set the y element of a 4-D vector
- */
-static inline void vmathSoaV4SetY( VmathSoaVector4 *result, vec_float4 y );
-
-/*
- * Set the z element of a 4-D vector
- */
-static inline void vmathSoaV4SetZ( VmathSoaVector4 *result, vec_float4 z );
-
-/*
- * Set the w element of a 4-D vector
- */
-static inline void vmathSoaV4SetW( VmathSoaVector4 *result, vec_float4 w );
-
-/*
- * Get the x element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetX( const VmathSoaVector4 *vec );
-
-/*
- * Get the y element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetY( const VmathSoaVector4 *vec );
-
-/*
- * Get the z element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetZ( const VmathSoaVector4 *vec );
-
-/*
- * Get the w element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetW( const VmathSoaVector4 *vec );
-
-/*
- * Set an x, y, z, or w element of a 4-D vector by index
- */
-static inline void vmathSoaV4SetElem( VmathSoaVector4 *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, z, or w element of a 4-D vector by index
- */
-static inline vec_float4 vmathSoaV4GetElem( const VmathSoaVector4 *vec, int idx );
-
-/*
- * Add two 4-D vectors
- */
-static inline void vmathSoaV4Add( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Subtract a 4-D vector from another 4-D vector
- */
-static inline void vmathSoaV4Sub( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Multiply a 4-D vector by a scalar
- */
-static inline void vmathSoaV4ScalarMul( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar );
-
-/*
- * Divide a 4-D vector by a scalar
- */
-static inline void vmathSoaV4ScalarDiv( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar );
-
-/*
- * Negate all elements of a 4-D vector
- */
-static inline void vmathSoaV4Neg( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Construct x axis
- */
-static inline void vmathSoaV4MakeXAxis( VmathSoaVector4 *result );
-
-/*
- * Construct y axis
- */
-static inline void vmathSoaV4MakeYAxis( VmathSoaVector4 *result );
-
-/*
- * Construct z axis
- */
-static inline void vmathSoaV4MakeZAxis( VmathSoaVector4 *result );
-
-/*
- * Construct w axis
- */
-static inline void vmathSoaV4MakeWAxis( VmathSoaVector4 *result );
-
-/*
- * Multiply two 4-D vectors per element
- */
-static inline void vmathSoaV4MulPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Divide two 4-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathSoaV4DivPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Compute the reciprocal of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathSoaV4RecipPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Compute the square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathSoaV4SqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Compute the reciprocal square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathSoaV4RsqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Compute the absolute value of a 4-D vector per element
- */
-static inline void vmathSoaV4AbsPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Copy sign from one 4-D vector to another, per element
- */
-static inline void vmathSoaV4CopySignPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Maximum of two 4-D vectors per element
- */
-static inline void vmathSoaV4MaxPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Minimum of two 4-D vectors per element
- */
-static inline void vmathSoaV4MinPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Maximum element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4MaxElem( const VmathSoaVector4 *vec );
-
-/*
- * Minimum element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4MinElem( const VmathSoaVector4 *vec );
-
-/*
- * Compute the sum of all elements of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4Sum( const VmathSoaVector4 *vec );
-
-/*
- * Compute the dot product of two 4-D vectors
- */
-static inline vec_float4 vmathSoaV4Dot( const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Compute the square of the length of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4LengthSqr( const VmathSoaVector4 *vec );
-
-/*
- * Compute the length of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4Length( const VmathSoaVector4 *vec );
-
-/*
- * Normalize a 4-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline void vmathSoaV4Normalize( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Outer product of two 4-D vectors
- */
-static inline void vmathSoaV4Outer( VmathSoaMatrix4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Linear interpolation between two 4-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaV4Lerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Spherical linear interpolation between two 4-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaV4Slerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *unitVec0, const VmathSoaVector4 *unitVec1 );
-
-/*
- * Conditionally select between two 4-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaV4Select( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1, vec_uint4 select1 );
-
-/*
- * Store four slots of an SoA 4-D vector as half-floats
- */
-static inline void vmathSoaV4StoreHalfFloats( const VmathSoaVector4 *vec, vec_ushort8 *twoQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV4Print( const VmathSoaVector4 *vec );
-
-/*
- * Print a 4-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV4Prints( const VmathSoaVector4 *vec, const char *name );
-
-#endif
-
-/*
- * Copy a 3-D point
- */
-static inline void vmathSoaP3Copy( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Construct a 3-D point from x, y, and z elements
- */
-static inline void vmathSoaP3MakeFromElems( VmathSoaPoint3 *result, vec_float4 x, vec_float4 y, vec_float4 z );
-
-/*
- * Copy elements from a 3-D vector into a 3-D point
- */
-static inline void vmathSoaP3MakeFromV3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Set all elements of a 3-D point to the same scalar value
- */
-static inline void vmathSoaP3MakeFromScalar( VmathSoaPoint3 *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS 3-D point
- */
-static inline void vmathSoaP3MakeFromAos( VmathSoaPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Insert four AoS 3-D points
- */
-static inline void vmathSoaP3MakeFrom4Aos( VmathSoaPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3 );
-
-/*
- * Extract four AoS 3-D points
- */
-static inline void vmathSoaP3Get4Aos( const VmathSoaPoint3 *pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 );
-
-/*
- * Set the x element of a 3-D point
- */
-static inline void vmathSoaP3SetX( VmathSoaPoint3 *result, vec_float4 x );
-
-/*
- * Set the y element of a 3-D point
- */
-static inline void vmathSoaP3SetY( VmathSoaPoint3 *result, vec_float4 y );
-
-/*
- * Set the z element of a 3-D point
- */
-static inline void vmathSoaP3SetZ( VmathSoaPoint3 *result, vec_float4 z );
-
-/*
- * Get the x element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3GetX( const VmathSoaPoint3 *pnt );
-
-/*
- * Get the y element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3GetY( const VmathSoaPoint3 *pnt );
-
-/*
- * Get the z element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3GetZ( const VmathSoaPoint3 *pnt );
-
-/*
- * Set an x, y, or z element of a 3-D point by index
- */
-static inline void vmathSoaP3SetElem( VmathSoaPoint3 *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, or z element of a 3-D point by index
- */
-static inline vec_float4 vmathSoaP3GetElem( const VmathSoaPoint3 *pnt, int idx );
-
-/*
- * Subtract a 3-D point from another 3-D point
- */
-static inline void vmathSoaP3Sub( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Add a 3-D point to a 3-D vector
- */
-static inline void vmathSoaP3AddV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec );
-
-/*
- * Subtract a 3-D vector from a 3-D point
- */
-static inline void vmathSoaP3SubV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec );
-
-/*
- * Multiply two 3-D points per element
- */
-static inline void vmathSoaP3MulPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Divide two 3-D points per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathSoaP3DivPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Compute the reciprocal of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathSoaP3RecipPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Compute the square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathSoaP3SqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Compute the reciprocal square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathSoaP3RsqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Compute the absolute value of a 3-D point per element
- */
-static inline void vmathSoaP3AbsPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Copy sign from one 3-D point to another, per element
- */
-static inline void vmathSoaP3CopySignPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Maximum of two 3-D points per element
- */
-static inline void vmathSoaP3MaxPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Minimum of two 3-D points per element
- */
-static inline void vmathSoaP3MinPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Maximum element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3MaxElem( const VmathSoaPoint3 *pnt );
-
-/*
- * Minimum element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3MinElem( const VmathSoaPoint3 *pnt );
-
-/*
- * Compute the sum of all elements of a 3-D point
- */
-static inline vec_float4 vmathSoaP3Sum( const VmathSoaPoint3 *pnt );
-
-/*
- * Apply uniform scale to a 3-D point
- */
-static inline void vmathSoaP3Scale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, vec_float4 scaleVal );
-
-/*
- * Apply non-uniform scale to a 3-D point
- */
-static inline void vmathSoaP3NonUniformScale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *scaleVec );
-
-/*
- * Scalar projection of a 3-D point on a unit-length 3-D vector
- */
-static inline vec_float4 vmathSoaP3Projection( const VmathSoaPoint3 *pnt, const VmathSoaVector3 *unitVec );
-
-/*
- * Compute the square of the distance of a 3-D point from the coordinate-system origin
- */
-static inline vec_float4 vmathSoaP3DistSqrFromOrigin( const VmathSoaPoint3 *pnt );
-
-/*
- * Compute the distance of a 3-D point from the coordinate-system origin
- */
-static inline vec_float4 vmathSoaP3DistFromOrigin( const VmathSoaPoint3 *pnt );
-
-/*
- * Compute the square of the distance between two 3-D points
- */
-static inline vec_float4 vmathSoaP3DistSqr( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Compute the distance between two 3-D points
- */
-static inline vec_float4 vmathSoaP3Dist( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Linear interpolation between two 3-D points
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaP3Lerp( VmathSoaPoint3 *result, vec_float4 t, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Conditionally select between two 3-D points
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaP3Select( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_uint4 select1 );
-
-/*
- * Load four three-float 3-D points, stored in three quadwords
- */
-static inline void vmathSoaP3LoadXYZArray( VmathSoaPoint3 *pnt, const vec_float4 *threeQuads );
-
-/*
- * Store four slots of an SoA 3-D point in three quadwords
- */
-static inline void vmathSoaP3StoreXYZArray( const VmathSoaPoint3 *pnt, vec_float4 *threeQuads );
-
-/*
- * Store eight slots of two SoA 3-D points as half-floats
- */
-static inline void vmathSoaP3StoreHalfFloats( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D point
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaP3Print( const VmathSoaPoint3 *pnt );
-
-/*
- * Print a 3-D point and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaP3Prints( const VmathSoaPoint3 *pnt, const char *name );
-
-#endif
-
-/*
- * Copy a quaternion
- */
-static inline void vmathSoaQCopy( VmathSoaQuat *result, const VmathSoaQuat *quat );
-
-/*
- * Construct a quaternion from x, y, z, and w elements
- */
-static inline void vmathSoaQMakeFromElems( VmathSoaQuat *result, vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
-
-/*
- * Construct a quaternion from a 3-D vector and a scalar
- */
-static inline void vmathSoaQMakeFromV3Scalar( VmathSoaQuat *result, const VmathSoaVector3 *xyz, vec_float4 w );
-
-/*
- * Copy elements from a 4-D vector into a quaternion
- */
-static inline void vmathSoaQMakeFromV4( VmathSoaQuat *result, const VmathSoaVector4 *vec );
-
-/*
- * Convert a rotation matrix to a unit-length quaternion
- */
-static inline void vmathSoaQMakeFromM3( VmathSoaQuat *result, const VmathSoaMatrix3 *rotMat );
-
-/*
- * Set all elements of a quaternion to the same scalar value
- */
-static inline void vmathSoaQMakeFromScalar( VmathSoaQuat *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS quaternion
- */
-static inline void vmathSoaQMakeFromAos( VmathSoaQuat *result, const VmathQuat *quat );
-
-/*
- * Insert four AoS quaternions
- */
-static inline void vmathSoaQMakeFrom4Aos( VmathSoaQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, const VmathQuat *quat2, const VmathQuat *quat3 );
-
-/*
- * Extract four AoS quaternions
- */
-static inline void vmathSoaQGet4Aos( const VmathSoaQuat *quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 );
-
-/*
- * Set the x, y, and z elements of a quaternion
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathSoaQSetXYZ( VmathSoaQuat *result, const VmathSoaVector3 *vec );
-
-/*
- * Get the x, y, and z elements of a quaternion
- */
-static inline void vmathSoaQGetXYZ( VmathSoaVector3 *result, const VmathSoaQuat *quat );
-
-/*
- * Set the x element of a quaternion
- */
-static inline void vmathSoaQSetX( VmathSoaQuat *result, vec_float4 x );
-
-/*
- * Set the y element of a quaternion
- */
-static inline void vmathSoaQSetY( VmathSoaQuat *result, vec_float4 y );
-
-/*
- * Set the z element of a quaternion
- */
-static inline void vmathSoaQSetZ( VmathSoaQuat *result, vec_float4 z );
-
-/*
- * Set the w element of a quaternion
- */
-static inline void vmathSoaQSetW( VmathSoaQuat *result, vec_float4 w );
-
-/*
- * Get the x element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetX( const VmathSoaQuat *quat );
-
-/*
- * Get the y element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetY( const VmathSoaQuat *quat );
-
-/*
- * Get the z element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetZ( const VmathSoaQuat *quat );
-
-/*
- * Get the w element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetW( const VmathSoaQuat *quat );
-
-/*
- * Set an x, y, z, or w element of a quaternion by index
- */
-static inline void vmathSoaQSetElem( VmathSoaQuat *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, z, or w element of a quaternion by index
- */
-static inline vec_float4 vmathSoaQGetElem( const VmathSoaQuat *quat, int idx );
-
-/*
- * Add two quaternions
- */
-static inline void vmathSoaQAdd( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
-
-/*
- * Subtract a quaternion from another quaternion
- */
-static inline void vmathSoaQSub( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
-
-/*
- * Multiply two quaternions
- */
-static inline void vmathSoaQMul( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
-
-/*
- * Multiply a quaternion by a scalar
- */
-static inline void vmathSoaQScalarMul( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar );
-
-/*
- * Divide a quaternion by a scalar
- */
-static inline void vmathSoaQScalarDiv( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar );
-
-/*
- * Negate all elements of a quaternion
- */
-static inline void vmathSoaQNeg( VmathSoaQuat *result, const VmathSoaQuat *quat );
-
-/*
- * Construct an identity quaternion
- */
-static inline void vmathSoaQMakeIdentity( VmathSoaQuat *result );
-
-/*
- * Construct a quaternion to rotate between two unit-length 3-D vectors
- * NOTE: 
- * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
- */
-static inline void vmathSoaQMakeRotationArc( VmathSoaQuat *result, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 );
-
-/*
- * Construct a quaternion to rotate around a unit-length 3-D vector
- */
-static inline void vmathSoaQMakeRotationAxis( VmathSoaQuat *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
-
-/*
- * Construct a quaternion to rotate around the x axis
- */
-static inline void vmathSoaQMakeRotationX( VmathSoaQuat *result, vec_float4 radians );
-
-/*
- * Construct a quaternion to rotate around the y axis
- */
-static inline void vmathSoaQMakeRotationY( VmathSoaQuat *result, vec_float4 radians );
-
-/*
- * Construct a quaternion to rotate around the z axis
- */
-static inline void vmathSoaQMakeRotationZ( VmathSoaQuat *result, vec_float4 radians );
-
-/*
- * Compute the conjugate of a quaternion
- */
-static inline void vmathSoaQConj( VmathSoaQuat *result, const VmathSoaQuat *quat );
-
-/*
- * Use a unit-length quaternion to rotate a 3-D vector
- */
-static inline void vmathSoaQRotate( VmathSoaVector3 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *vec );
-
-/*
- * Compute the dot product of two quaternions
- */
-static inline vec_float4 vmathSoaQDot( const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
-
-/*
- * Compute the norm of a quaternion
- */
-static inline vec_float4 vmathSoaQNorm( const VmathSoaQuat *quat );
-
-/*
- * Compute the length of a quaternion
- */
-static inline vec_float4 vmathSoaQLength( const VmathSoaQuat *quat );
-
-/*
- * Normalize a quaternion
- * NOTE: 
- * The result is unpredictable when all elements of quat are at or near zero.
- */
-static inline void vmathSoaQNormalize( VmathSoaQuat *result, const VmathSoaQuat *quat );
-
-/*
- * Linear interpolation between two quaternions
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaQLerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
-
-/*
- * Spherical linear interpolation between two quaternions
- * NOTE: 
- * Interpolates along the shortest path between orientations.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaQSlerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1 );
-
-/*
- * Spherical quadrangle interpolation
- */
-static inline void vmathSoaQSquad( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1, const VmathSoaQuat *unitQuat2, const VmathSoaQuat *unitQuat3 );
-
-/*
- * Conditionally select between two quaternions
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaQSelect( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a quaternion
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaQPrint( const VmathSoaQuat *quat );
-
-/*
- * Print a quaternion and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaQPrints( const VmathSoaQuat *quat, const char *name );
-
-#endif
-
-/*
- * Copy a 3x3 matrix
- */
-static inline void vmathSoaM3Copy( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Construct a 3x3 matrix containing the specified columns
- */
-static inline void vmathSoaM3MakeFromCols( VmathSoaMatrix3 *result, const VmathSoaVector3 *col0, const VmathSoaVector3 *col1, const VmathSoaVector3 *col2 );
-
-/*
- * Construct a 3x3 rotation matrix from a unit-length quaternion
- */
-static inline void vmathSoaM3MakeFromQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat );
-
-/*
- * Set all elements of a 3x3 matrix to the same scalar value
- */
-static inline void vmathSoaM3MakeFromScalar( VmathSoaMatrix3 *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS 3x3 matrix
- */
-static inline void vmathSoaM3MakeFromAos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Insert four AoS 3x3 matrices
- */
-static inline void vmathSoaM3MakeFrom4Aos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, const VmathMatrix3 *mat2, const VmathMatrix3 *mat3 );
-
-/*
- * Extract four AoS 3x3 matrices
- */
-static inline void vmathSoaM3Get4Aos( const VmathSoaMatrix3 *mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 );
-
-/*
- * Set column 0 of a 3x3 matrix
- */
-static inline void vmathSoaM3SetCol0( VmathSoaMatrix3 *result, const VmathSoaVector3 *col0 );
-
-/*
- * Set column 1 of a 3x3 matrix
- */
-static inline void vmathSoaM3SetCol1( VmathSoaMatrix3 *result, const VmathSoaVector3 *col1 );
-
-/*
- * Set column 2 of a 3x3 matrix
- */
-static inline void vmathSoaM3SetCol2( VmathSoaMatrix3 *result, const VmathSoaVector3 *col2 );
-
-/*
- * Get column 0 of a 3x3 matrix
- */
-static inline void vmathSoaM3GetCol0( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Get column 1 of a 3x3 matrix
- */
-static inline void vmathSoaM3GetCol1( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Get column 2 of a 3x3 matrix
- */
-static inline void vmathSoaM3GetCol2( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Set the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathSoaM3SetCol( VmathSoaMatrix3 *result, int col, const VmathSoaVector3 *vec );
-
-/*
- * Set the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathSoaM3SetRow( VmathSoaMatrix3 *result, int row, const VmathSoaVector3 *vec );
-
-/*
- * Get the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathSoaM3GetCol( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int col );
-
-/*
- * Get the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathSoaM3GetRow( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int row );
-
-/*
- * Set the element of a 3x3 matrix referred to by column and row indices
- */
-static inline void vmathSoaM3SetElem( VmathSoaMatrix3 *result, int col, int row, vec_float4 val );
-
-/*
- * Get the element of a 3x3 matrix referred to by column and row indices
- */
-static inline vec_float4 vmathSoaM3GetElem( const VmathSoaMatrix3 *mat, int col, int row );
-
-/*
- * Add two 3x3 matrices
- */
-static inline void vmathSoaM3Add( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
-
-/*
- * Subtract a 3x3 matrix from another 3x3 matrix
- */
-static inline void vmathSoaM3Sub( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
-
-/*
- * Negate all elements of a 3x3 matrix
- */
-static inline void vmathSoaM3Neg( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Multiply a 3x3 matrix by a scalar
- */
-static inline void vmathSoaM3ScalarMul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, vec_float4 scalar );
-
-/*
- * Multiply a 3x3 matrix by a 3-D vector
- */
-static inline void vmathSoaM3MulV3( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *vec );
-
-/*
- * Multiply two 3x3 matrices
- */
-static inline void vmathSoaM3Mul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
-
-/*
- * Construct an identity 3x3 matrix
- */
-static inline void vmathSoaM3MakeIdentity( VmathSoaMatrix3 *result );
-
-/*
- * Construct a 3x3 matrix to rotate around the x axis
- */
-static inline void vmathSoaM3MakeRotationX( VmathSoaMatrix3 *result, vec_float4 radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the y axis
- */
-static inline void vmathSoaM3MakeRotationY( VmathSoaMatrix3 *result, vec_float4 radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the z axis
- */
-static inline void vmathSoaM3MakeRotationZ( VmathSoaMatrix3 *result, vec_float4 radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the x, y, and z axes
- */
-static inline void vmathSoaM3MakeRotationZYX( VmathSoaMatrix3 *result, const VmathSoaVector3 *radiansXYZ );
-
-/*
- * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathSoaM3MakeRotationAxis( VmathSoaMatrix3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathSoaM3MakeRotationQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat );
-
-/*
- * Construct a 3x3 matrix to perform scaling
- */
-static inline void vmathSoaM3MakeScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathSoaM3AppendScale( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathSoaM3PrependScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix3 *mat );
-
-/*
- * Multiply two 3x3 matrices per element
- */
-static inline void vmathSoaM3MulPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
-
-/*
- * Compute the absolute value of a 3x3 matrix per element
- */
-static inline void vmathSoaM3AbsPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Transpose of a 3x3 matrix
- */
-static inline void vmathSoaM3Transpose( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Compute the inverse of a 3x3 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathSoaM3Inverse( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Determinant of a 3x3 matrix
- */
-static inline vec_float4 vmathSoaM3Determinant( const VmathSoaMatrix3 *mat );
-
-/*
- * Conditionally select between two 3x3 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaM3Select( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x3 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM3Print( const VmathSoaMatrix3 *mat );
-
-/*
- * Print a 3x3 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM3Prints( const VmathSoaMatrix3 *mat, const char *name );
-
-#endif
-
-/*
- * Copy a 4x4 matrix
- */
-static inline void vmathSoaM4Copy( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Construct a 4x4 matrix containing the specified columns
- */
-static inline void vmathSoaM4MakeFromCols( VmathSoaMatrix4 *result, const VmathSoaVector4 *col0, const VmathSoaVector4 *col1, const VmathSoaVector4 *col2, const VmathSoaVector4 *col3 );
-
-/*
- * Construct a 4x4 matrix from a 3x4 transformation matrix
- */
-static inline void vmathSoaM4MakeFromT3( VmathSoaMatrix4 *result, const VmathSoaTransform3 *mat );
-
-/*
- * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
- */
-static inline void vmathSoaM4MakeFromM3V3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *translateVec );
-
-/*
- * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
- */
-static inline void vmathSoaM4MakeFromQV3( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec );
-
-/*
- * Set all elements of a 4x4 matrix to the same scalar value
- */
-static inline void vmathSoaM4MakeFromScalar( VmathSoaMatrix4 *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS 4x4 matrix
- */
-static inline void vmathSoaM4MakeFromAos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Insert four AoS 4x4 matrices
- */
-static inline void vmathSoaM4MakeFrom4Aos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, const VmathMatrix4 *mat2, const VmathMatrix4 *mat3 );
-
-/*
- * Extract four AoS 4x4 matrices
- */
-static inline void vmathSoaM4Get4Aos( const VmathSoaMatrix4 *mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 );
-
-/*
- * Set the upper-left 3x3 submatrix
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathSoaM4SetUpper3x3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 4x4 matrix
- */
-static inline void vmathSoaM4GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Set translation component
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathSoaM4SetTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec );
-
-/*
- * Get the translation component of a 4x4 matrix
- */
-static inline void vmathSoaM4GetTranslation( VmathSoaVector3 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Set column 0 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol0( VmathSoaMatrix4 *result, const VmathSoaVector4 *col0 );
-
-/*
- * Set column 1 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol1( VmathSoaMatrix4 *result, const VmathSoaVector4 *col1 );
-
-/*
- * Set column 2 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol2( VmathSoaMatrix4 *result, const VmathSoaVector4 *col2 );
-
-/*
- * Set column 3 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol3( VmathSoaMatrix4 *result, const VmathSoaVector4 *col3 );
-
-/*
- * Get column 0 of a 4x4 matrix
- */
-static inline void vmathSoaM4GetCol0( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Get column 1 of a 4x4 matrix
- */
-static inline void vmathSoaM4GetCol1( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Get column 2 of a 4x4 matrix
- */
-static inline void vmathSoaM4GetCol2( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Get column 3 of a 4x4 matrix
- */
-static inline void vmathSoaM4GetCol3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Set the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathSoaM4SetCol( VmathSoaMatrix4 *result, int col, const VmathSoaVector4 *vec );
-
-/*
- * Set the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathSoaM4SetRow( VmathSoaMatrix4 *result, int row, const VmathSoaVector4 *vec );
-
-/*
- * Get the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathSoaM4GetCol( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int col );
-
-/*
- * Get the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathSoaM4GetRow( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int row );
-
-/*
- * Set the element of a 4x4 matrix referred to by column and row indices
- */
-static inline void vmathSoaM4SetElem( VmathSoaMatrix4 *result, int col, int row, vec_float4 val );
-
-/*
- * Get the element of a 4x4 matrix referred to by column and row indices
- */
-static inline vec_float4 vmathSoaM4GetElem( const VmathSoaMatrix4 *mat, int col, int row );
-
-/*
- * Add two 4x4 matrices
- */
-static inline void vmathSoaM4Add( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
-
-/*
- * Subtract a 4x4 matrix from another 4x4 matrix
- */
-static inline void vmathSoaM4Sub( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
-
-/*
- * Negate all elements of a 4x4 matrix
- */
-static inline void vmathSoaM4Neg( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Multiply a 4x4 matrix by a scalar
- */
-static inline void vmathSoaM4ScalarMul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, vec_float4 scalar );
-
-/*
- * Multiply a 4x4 matrix by a 4-D vector
- */
-static inline void vmathSoaM4MulV4( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector4 *vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D vector
- */
-static inline void vmathSoaM4MulV3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D point
- */
-static inline void vmathSoaM4MulP3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaPoint3 *pnt );
-
-/*
- * Multiply two 4x4 matrices
- */
-static inline void vmathSoaM4Mul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
-
-/*
- * Multiply a 4x4 matrix by a 3x4 transformation matrix
- */
-static inline void vmathSoaM4MulT3( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaTransform3 *tfrm );
-
-/*
- * Construct an identity 4x4 matrix
- */
-static inline void vmathSoaM4MakeIdentity( VmathSoaMatrix4 *result );
-
-/*
- * Construct a 4x4 matrix to rotate around the x axis
- */
-static inline void vmathSoaM4MakeRotationX( VmathSoaMatrix4 *result, vec_float4 radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the y axis
- */
-static inline void vmathSoaM4MakeRotationY( VmathSoaMatrix4 *result, vec_float4 radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the z axis
- */
-static inline void vmathSoaM4MakeRotationZ( VmathSoaMatrix4 *result, vec_float4 radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the x, y, and z axes
- */
-static inline void vmathSoaM4MakeRotationZYX( VmathSoaMatrix4 *result, const VmathSoaVector3 *radiansXYZ );
-
-/*
- * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathSoaM4MakeRotationAxis( VmathSoaMatrix4 *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathSoaM4MakeRotationQ( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat );
-
-/*
- * Construct a 4x4 matrix to perform scaling
- */
-static inline void vmathSoaM4MakeScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec );
-
-/*
- * Construct a 4x4 matrix to perform translation
- */
-static inline void vmathSoaM4MakeTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec );
-
-/*
- * Construct viewing matrix based on eye position, position looked at, and up direction
- */
-static inline void vmathSoaM4MakeLookAt( VmathSoaMatrix4 *result, const VmathSoaPoint3 *eyePos, const VmathSoaPoint3 *lookAtPos, const VmathSoaVector3 *upVec );
-
-/*
- * Construct a perspective projection matrix
- */
-static inline void vmathSoaM4MakePerspective( VmathSoaMatrix4 *result, vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar );
-
-/*
- * Construct a perspective projection matrix based on frustum
- */
-static inline void vmathSoaM4MakeFrustum( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
-
-/*
- * Construct an orthographic projection matrix
- */
-static inline void vmathSoaM4MakeOrthographic( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
-
-/*
- * Append (post-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathSoaM4AppendScale( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathSoaM4PrependScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix4 *mat );
-
-/*
- * Multiply two 4x4 matrices per element
- */
-static inline void vmathSoaM4MulPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
-
-/*
- * Compute the absolute value of a 4x4 matrix per element
- */
-static inline void vmathSoaM4AbsPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Transpose of a 4x4 matrix
- */
-static inline void vmathSoaM4Transpose( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathSoaM4Inverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathSoaM4AffineInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
- */
-static inline void vmathSoaM4OrthoInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Determinant of a 4x4 matrix
- */
-static inline vec_float4 vmathSoaM4Determinant( const VmathSoaMatrix4 *mat );
-
-/*
- * Conditionally select between two 4x4 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaM4Select( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4x4 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM4Print( const VmathSoaMatrix4 *mat );
-
-/*
- * Print a 4x4 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM4Prints( const VmathSoaMatrix4 *mat, const char *name );
-
-#endif
-
-/*
- * Copy a 3x4 transformation matrix
- */
-static inline void vmathSoaT3Copy( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Construct a 3x4 transformation matrix containing the specified columns
- */
-static inline void vmathSoaT3MakeFromCols( VmathSoaTransform3 *result, const VmathSoaVector3 *col0, const VmathSoaVector3 *col1, const VmathSoaVector3 *col2, const VmathSoaVector3 *col3 );
-
-/*
- * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
- */
-static inline void vmathSoaT3MakeFromM3V3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *tfrm, const VmathSoaVector3 *translateVec );
-
-/*
- * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
- */
-static inline void vmathSoaT3MakeFromQV3( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec );
-
-/*
- * Set all elements of a 3x4 transformation matrix to the same scalar value
- */
-static inline void vmathSoaT3MakeFromScalar( VmathSoaTransform3 *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS 3x4 transformation matrix
- */
-static inline void vmathSoaT3MakeFromAos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Insert four AoS 3x4 transformation matrices
- */
-static inline void vmathSoaT3MakeFrom4Aos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, const VmathTransform3 *tfrm2, const VmathTransform3 *tfrm3 );
-
-/*
- * Extract four AoS 3x4 transformation matrices
- */
-static inline void vmathSoaT3Get4Aos( const VmathSoaTransform3 *tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 );
-
-/*
- * Set the upper-left 3x3 submatrix
- */
-static inline void vmathSoaT3SetUpper3x3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Set translation component
- */
-static inline void vmathSoaT3SetTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec );
-
-/*
- * Get the translation component of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3GetTranslation( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Set column 0 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol0( VmathSoaTransform3 *result, const VmathSoaVector3 *col0 );
-
-/*
- * Set column 1 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol1( VmathSoaTransform3 *result, const VmathSoaVector3 *col1 );
-
-/*
- * Set column 2 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol2( VmathSoaTransform3 *result, const VmathSoaVector3 *col2 );
-
-/*
- * Set column 3 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol3( VmathSoaTransform3 *result, const VmathSoaVector3 *col3 );
-
-/*
- * Get column 0 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3GetCol0( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Get column 1 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3GetCol1( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Get column 2 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3GetCol2( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Get column 3 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3GetCol3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Set the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathSoaT3SetCol( VmathSoaTransform3 *result, int col, const VmathSoaVector3 *vec );
-
-/*
- * Set the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathSoaT3SetRow( VmathSoaTransform3 *result, int row, const VmathSoaVector4 *vec );
-
-/*
- * Get the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathSoaT3GetCol( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, int col );
-
-/*
- * Get the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathSoaT3GetRow( VmathSoaVector4 *result, const VmathSoaTransform3 *tfrm, int row );
-
-/*
- * Set the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline void vmathSoaT3SetElem( VmathSoaTransform3 *result, int col, int row, vec_float4 val );
-
-/*
- * Get the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline vec_float4 vmathSoaT3GetElem( const VmathSoaTransform3 *tfrm, int col, int row );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D vector
- */
-static inline void vmathSoaT3MulV3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *vec );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D point
- */
-static inline void vmathSoaT3MulP3( VmathSoaPoint3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaPoint3 *pnt );
-
-/*
- * Multiply two 3x4 transformation matrices
- */
-static inline void vmathSoaT3Mul( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 );
-
-/*
- * Construct an identity 3x4 transformation matrix
- */
-static inline void vmathSoaT3MakeIdentity( VmathSoaTransform3 *result );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x axis
- */
-static inline void vmathSoaT3MakeRotationX( VmathSoaTransform3 *result, vec_float4 radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the y axis
- */
-static inline void vmathSoaT3MakeRotationY( VmathSoaTransform3 *result, vec_float4 radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the z axis
- */
-static inline void vmathSoaT3MakeRotationZ( VmathSoaTransform3 *result, vec_float4 radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
- */
-static inline void vmathSoaT3MakeRotationZYX( VmathSoaTransform3 *result, const VmathSoaVector3 *radiansXYZ );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathSoaT3MakeRotationAxis( VmathSoaTransform3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathSoaT3MakeRotationQ( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat );
-
-/*
- * Construct a 3x4 transformation matrix to perform scaling
- */
-static inline void vmathSoaT3MakeScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec );
-
-/*
- * Construct a 3x4 transformation matrix to perform translation
- */
-static inline void vmathSoaT3MakeTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathSoaT3AppendScale( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathSoaT3PrependScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaTransform3 *tfrm );
-
-/*
- * Multiply two 3x4 transformation matrices per element
- */
-static inline void vmathSoaT3MulPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 );
-
-/*
- * Compute the absolute value of a 3x4 transformation matrix per element
- */
-static inline void vmathSoaT3AbsPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Inverse of a 3x4 transformation matrix
- * NOTE: 
- * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
- */
-static inline void vmathSoaT3Inverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
- */
-static inline void vmathSoaT3OrthoInverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Conditionally select between two 3x4 transformation matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaT3Select( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x4 transformation matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaT3Print( const VmathSoaTransform3 *tfrm );
-
-/*
- * Print a 3x4 transformation matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaT3Prints( const VmathSoaTransform3 *tfrm, const char *name );
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#include "vec_soa.h"
-#include "quat_soa.h"
-#include "mat_soa.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_SOA_C_SPU_H
+#define _VECTORMATH_SOA_C_SPU_H
+
+#include <math.h>
+#include <spu_intrinsics.h>
+#include "vectormath_aos.h"
+
+#ifdef _VECTORMATH_DEBUG
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef _VECTORMATH_SOA_C_TYPES_H
+#define _VECTORMATH_SOA_C_TYPES_H
+
+/* A set of four 3-D vectors in structure-of-arrays format
+ */
+typedef struct _VmathSoaVector3
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+} VmathSoaVector3;
+
+/* A set of four 4-D vectors in structure-of-arrays format
+ */
+typedef struct _VmathSoaVector4
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+    vec_float4 w;
+} VmathSoaVector4;
+
+/* A set of four 3-D points in structure-of-arrays format
+ */
+typedef struct _VmathSoaPoint3
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+} VmathSoaPoint3;
+
+/* A set of four quaternions in structure-of-arrays format
+ */
+typedef struct _VmathSoaQuat
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+    vec_float4 w;
+} VmathSoaQuat;
+
+/* A set of four 3x3 matrices in structure-of-arrays format
+ */
+typedef struct _VmathSoaMatrix3
+{
+    VmathSoaVector3 col0;
+    VmathSoaVector3 col1;
+    VmathSoaVector3 col2;
+} VmathSoaMatrix3;
+
+/* A set of four 4x4 matrices in structure-of-arrays format
+ */
+typedef struct _VmathSoaMatrix4
+{
+    VmathSoaVector4 col0;
+    VmathSoaVector4 col1;
+    VmathSoaVector4 col2;
+    VmathSoaVector4 col3;
+} VmathSoaMatrix4;
+
+/* A set of four 3x4 transformation matrices in structure-of-arrays format
+ */
+typedef struct _VmathSoaTransform3
+{
+    VmathSoaVector3 col0;
+    VmathSoaVector3 col1;
+    VmathSoaVector3 col2;
+    VmathSoaVector3 col3;
+} VmathSoaTransform3;
+
+#endif
+
+/*
+ * Copy a 3-D vector
+ */
+static inline void vmathSoaV3Copy( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Construct a 3-D vector from x, y, and z elements
+ */
+static inline void vmathSoaV3MakeFromElems( VmathSoaVector3 *result, vec_float4 x, vec_float4 y, vec_float4 z );
+
+/*
+ * Copy elements from a 3-D point into a 3-D vector
+ */
+static inline void vmathSoaV3MakeFromP3( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Set all elements of a 3-D vector to the same scalar value
+ */
+static inline void vmathSoaV3MakeFromScalar( VmathSoaVector3 *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3-D vector
+ */
+static inline void vmathSoaV3MakeFromAos( VmathSoaVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Insert four AoS 3-D vectors
+ */
+static inline void vmathSoaV3MakeFrom4Aos( VmathSoaVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3 );
+
+/*
+ * Extract four AoS 3-D vectors
+ */
+static inline void vmathSoaV3Get4Aos( const VmathSoaVector3 *vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 );
+
+/*
+ * Set the x element of a 3-D vector
+ */
+static inline void vmathSoaV3SetX( VmathSoaVector3 *result, vec_float4 x );
+
+/*
+ * Set the y element of a 3-D vector
+ */
+static inline void vmathSoaV3SetY( VmathSoaVector3 *result, vec_float4 y );
+
+/*
+ * Set the z element of a 3-D vector
+ */
+static inline void vmathSoaV3SetZ( VmathSoaVector3 *result, vec_float4 z );
+
+/*
+ * Get the x element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3GetX( const VmathSoaVector3 *vec );
+
+/*
+ * Get the y element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3GetY( const VmathSoaVector3 *vec );
+
+/*
+ * Get the z element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3GetZ( const VmathSoaVector3 *vec );
+
+/*
+ * Set an x, y, or z element of a 3-D vector by index
+ */
+static inline void vmathSoaV3SetElem( VmathSoaVector3 *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, or z element of a 3-D vector by index
+ */
+static inline vec_float4 vmathSoaV3GetElem( const VmathSoaVector3 *vec, int idx );
+
+/*
+ * Add two 3-D vectors
+ */
+static inline void vmathSoaV3Add( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Subtract a 3-D vector from another 3-D vector
+ */
+static inline void vmathSoaV3Sub( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Add a 3-D vector to a 3-D point
+ */
+static inline void vmathSoaV3AddP3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec, const VmathSoaPoint3 *pnt );
+
+/*
+ * Multiply a 3-D vector by a scalar
+ */
+static inline void vmathSoaV3ScalarMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar );
+
+/*
+ * Divide a 3-D vector by a scalar
+ */
+static inline void vmathSoaV3ScalarDiv( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar );
+
+/*
+ * Negate all elements of a 3-D vector
+ */
+static inline void vmathSoaV3Neg( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Construct x axis
+ */
+static inline void vmathSoaV3MakeXAxis( VmathSoaVector3 *result );
+
+/*
+ * Construct y axis
+ */
+static inline void vmathSoaV3MakeYAxis( VmathSoaVector3 *result );
+
+/*
+ * Construct z axis
+ */
+static inline void vmathSoaV3MakeZAxis( VmathSoaVector3 *result );
+
+/*
+ * Multiply two 3-D vectors per element
+ */
+static inline void vmathSoaV3MulPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Divide two 3-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathSoaV3DivPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Compute the reciprocal of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathSoaV3RecipPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Compute the square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathSoaV3SqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Compute the reciprocal square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathSoaV3RsqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Compute the absolute value of a 3-D vector per element
+ */
+static inline void vmathSoaV3AbsPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Copy sign from one 3-D vector to another, per element
+ */
+static inline void vmathSoaV3CopySignPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Maximum of two 3-D vectors per element
+ */
+static inline void vmathSoaV3MaxPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Minimum of two 3-D vectors per element
+ */
+static inline void vmathSoaV3MinPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Maximum element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3MaxElem( const VmathSoaVector3 *vec );
+
+/*
+ * Minimum element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3MinElem( const VmathSoaVector3 *vec );
+
+/*
+ * Compute the sum of all elements of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3Sum( const VmathSoaVector3 *vec );
+
+/*
+ * Compute the dot product of two 3-D vectors
+ */
+static inline vec_float4 vmathSoaV3Dot( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Compute the square of the length of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3LengthSqr( const VmathSoaVector3 *vec );
+
+/*
+ * Compute the length of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3Length( const VmathSoaVector3 *vec );
+
+/*
+ * Normalize a 3-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline void vmathSoaV3Normalize( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Compute cross product of two 3-D vectors
+ */
+static inline void vmathSoaV3Cross( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Outer product of two 3-D vectors
+ */
+static inline void vmathSoaV3Outer( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Pre-multiply a row vector by a 3x3 matrix
+ */
+static inline void vmathSoaV3RowMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat );
+
+/*
+ * Cross-product matrix of a 3-D vector
+ */
+static inline void vmathSoaV3CrossMatrix( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Create cross-product matrix and multiply
+ * NOTE: 
+ * Faster than separately creating a cross-product matrix and multiplying.
+ */
+static inline void vmathSoaV3CrossMatrixMul( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat );
+
+/*
+ * Linear interpolation between two 3-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaV3Lerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Spherical linear interpolation between two 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaV3Slerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 );
+
+/*
+ * Conditionally select between two 3-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaV3Select( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_uint4 select1 );
+
+/*
+ * Load four three-float 3-D vectors, stored in three quadwords
+ */
+static inline void vmathSoaV3LoadXYZArray( VmathSoaVector3 *vec, const vec_float4 *threeQuads );
+
+/*
+ * Store four slots of an SoA 3-D vector in three quadwords
+ */
+static inline void vmathSoaV3StoreXYZArray( const VmathSoaVector3 *vec, vec_float4 *threeQuads );
+
+/*
+ * Store eight slots of two SoA 3-D vectors as half-floats
+ */
+static inline void vmathSoaV3StoreHalfFloats( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV3Print( const VmathSoaVector3 *vec );
+
+/*
+ * Print a 3-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV3Prints( const VmathSoaVector3 *vec, const char *name );
+
+#endif
+
+/*
+ * Copy a 4-D vector
+ */
+static inline void vmathSoaV4Copy( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Construct a 4-D vector from x, y, z, and w elements
+ */
+static inline void vmathSoaV4MakeFromElems( VmathSoaVector4 *result, vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
+
+/*
+ * Construct a 4-D vector from a 3-D vector and a scalar
+ */
+static inline void vmathSoaV4MakeFromV3Scalar( VmathSoaVector4 *result, const VmathSoaVector3 *xyz, vec_float4 w );
+
+/*
+ * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+ */
+static inline void vmathSoaV4MakeFromV3( VmathSoaVector4 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+ */
+static inline void vmathSoaV4MakeFromP3( VmathSoaVector4 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Copy elements from a quaternion into a 4-D vector
+ */
+static inline void vmathSoaV4MakeFromQ( VmathSoaVector4 *result, const VmathSoaQuat *quat );
+
+/*
+ * Set all elements of a 4-D vector to the same scalar value
+ */
+static inline void vmathSoaV4MakeFromScalar( VmathSoaVector4 *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS 4-D vector
+ */
+static inline void vmathSoaV4MakeFromAos( VmathSoaVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Insert four AoS 4-D vectors
+ */
+static inline void vmathSoaV4MakeFrom4Aos( VmathSoaVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3 );
+
+/*
+ * Extract four AoS 4-D vectors
+ */
+static inline void vmathSoaV4Get4Aos( const VmathSoaVector4 *vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 );
+
+/*
+ * Set the x, y, and z elements of a 4-D vector
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathSoaV4SetXYZ( VmathSoaVector4 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Get the x, y, and z elements of a 4-D vector
+ */
+static inline void vmathSoaV4GetXYZ( VmathSoaVector3 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Set the x element of a 4-D vector
+ */
+static inline void vmathSoaV4SetX( VmathSoaVector4 *result, vec_float4 x );
+
+/*
+ * Set the y element of a 4-D vector
+ */
+static inline void vmathSoaV4SetY( VmathSoaVector4 *result, vec_float4 y );
+
+/*
+ * Set the z element of a 4-D vector
+ */
+static inline void vmathSoaV4SetZ( VmathSoaVector4 *result, vec_float4 z );
+
+/*
+ * Set the w element of a 4-D vector
+ */
+static inline void vmathSoaV4SetW( VmathSoaVector4 *result, vec_float4 w );
+
+/*
+ * Get the x element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetX( const VmathSoaVector4 *vec );
+
+/*
+ * Get the y element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetY( const VmathSoaVector4 *vec );
+
+/*
+ * Get the z element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetZ( const VmathSoaVector4 *vec );
+
+/*
+ * Get the w element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetW( const VmathSoaVector4 *vec );
+
+/*
+ * Set an x, y, z, or w element of a 4-D vector by index
+ */
+static inline void vmathSoaV4SetElem( VmathSoaVector4 *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, z, or w element of a 4-D vector by index
+ */
+static inline vec_float4 vmathSoaV4GetElem( const VmathSoaVector4 *vec, int idx );
+
+/*
+ * Add two 4-D vectors
+ */
+static inline void vmathSoaV4Add( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Subtract a 4-D vector from another 4-D vector
+ */
+static inline void vmathSoaV4Sub( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Multiply a 4-D vector by a scalar
+ */
+static inline void vmathSoaV4ScalarMul( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar );
+
+/*
+ * Divide a 4-D vector by a scalar
+ */
+static inline void vmathSoaV4ScalarDiv( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar );
+
+/*
+ * Negate all elements of a 4-D vector
+ */
+static inline void vmathSoaV4Neg( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Construct x axis
+ */
+static inline void vmathSoaV4MakeXAxis( VmathSoaVector4 *result );
+
+/*
+ * Construct y axis
+ */
+static inline void vmathSoaV4MakeYAxis( VmathSoaVector4 *result );
+
+/*
+ * Construct z axis
+ */
+static inline void vmathSoaV4MakeZAxis( VmathSoaVector4 *result );
+
+/*
+ * Construct w axis
+ */
+static inline void vmathSoaV4MakeWAxis( VmathSoaVector4 *result );
+
+/*
+ * Multiply two 4-D vectors per element
+ */
+static inline void vmathSoaV4MulPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Divide two 4-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathSoaV4DivPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Compute the reciprocal of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathSoaV4RecipPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Compute the square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathSoaV4SqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Compute the reciprocal square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathSoaV4RsqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Compute the absolute value of a 4-D vector per element
+ */
+static inline void vmathSoaV4AbsPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Copy sign from one 4-D vector to another, per element
+ */
+static inline void vmathSoaV4CopySignPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Maximum of two 4-D vectors per element
+ */
+static inline void vmathSoaV4MaxPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Minimum of two 4-D vectors per element
+ */
+static inline void vmathSoaV4MinPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Maximum element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4MaxElem( const VmathSoaVector4 *vec );
+
+/*
+ * Minimum element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4MinElem( const VmathSoaVector4 *vec );
+
+/*
+ * Compute the sum of all elements of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4Sum( const VmathSoaVector4 *vec );
+
+/*
+ * Compute the dot product of two 4-D vectors
+ */
+static inline vec_float4 vmathSoaV4Dot( const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Compute the square of the length of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4LengthSqr( const VmathSoaVector4 *vec );
+
+/*
+ * Compute the length of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4Length( const VmathSoaVector4 *vec );
+
+/*
+ * Normalize a 4-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline void vmathSoaV4Normalize( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Outer product of two 4-D vectors
+ */
+static inline void vmathSoaV4Outer( VmathSoaMatrix4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Linear interpolation between two 4-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaV4Lerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Spherical linear interpolation between two 4-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaV4Slerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *unitVec0, const VmathSoaVector4 *unitVec1 );
+
+/*
+ * Conditionally select between two 4-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaV4Select( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1, vec_uint4 select1 );
+
+/*
+ * Store four slots of an SoA 4-D vector as half-floats
+ */
+static inline void vmathSoaV4StoreHalfFloats( const VmathSoaVector4 *vec, vec_ushort8 *twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV4Print( const VmathSoaVector4 *vec );
+
+/*
+ * Print a 4-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV4Prints( const VmathSoaVector4 *vec, const char *name );
+
+#endif
+
+/*
+ * Copy a 3-D point
+ */
+static inline void vmathSoaP3Copy( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Construct a 3-D point from x, y, and z elements
+ */
+static inline void vmathSoaP3MakeFromElems( VmathSoaPoint3 *result, vec_float4 x, vec_float4 y, vec_float4 z );
+
+/*
+ * Copy elements from a 3-D vector into a 3-D point
+ */
+static inline void vmathSoaP3MakeFromV3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Set all elements of a 3-D point to the same scalar value
+ */
+static inline void vmathSoaP3MakeFromScalar( VmathSoaPoint3 *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3-D point
+ */
+static inline void vmathSoaP3MakeFromAos( VmathSoaPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Insert four AoS 3-D points
+ */
+static inline void vmathSoaP3MakeFrom4Aos( VmathSoaPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3 );
+
+/*
+ * Extract four AoS 3-D points
+ */
+static inline void vmathSoaP3Get4Aos( const VmathSoaPoint3 *pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 );
+
+/*
+ * Set the x element of a 3-D point
+ */
+static inline void vmathSoaP3SetX( VmathSoaPoint3 *result, vec_float4 x );
+
+/*
+ * Set the y element of a 3-D point
+ */
+static inline void vmathSoaP3SetY( VmathSoaPoint3 *result, vec_float4 y );
+
+/*
+ * Set the z element of a 3-D point
+ */
+static inline void vmathSoaP3SetZ( VmathSoaPoint3 *result, vec_float4 z );
+
+/*
+ * Get the x element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3GetX( const VmathSoaPoint3 *pnt );
+
+/*
+ * Get the y element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3GetY( const VmathSoaPoint3 *pnt );
+
+/*
+ * Get the z element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3GetZ( const VmathSoaPoint3 *pnt );
+
+/*
+ * Set an x, y, or z element of a 3-D point by index
+ */
+static inline void vmathSoaP3SetElem( VmathSoaPoint3 *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, or z element of a 3-D point by index
+ */
+static inline vec_float4 vmathSoaP3GetElem( const VmathSoaPoint3 *pnt, int idx );
+
+/*
+ * Subtract a 3-D point from another 3-D point
+ */
+static inline void vmathSoaP3Sub( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Add a 3-D point to a 3-D vector
+ */
+static inline void vmathSoaP3AddV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec );
+
+/*
+ * Subtract a 3-D vector from a 3-D point
+ */
+static inline void vmathSoaP3SubV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec );
+
+/*
+ * Multiply two 3-D points per element
+ */
+static inline void vmathSoaP3MulPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Divide two 3-D points per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathSoaP3DivPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Compute the reciprocal of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathSoaP3RecipPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Compute the square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathSoaP3SqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Compute the reciprocal square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathSoaP3RsqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Compute the absolute value of a 3-D point per element
+ */
+static inline void vmathSoaP3AbsPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Copy sign from one 3-D point to another, per element
+ */
+static inline void vmathSoaP3CopySignPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Maximum of two 3-D points per element
+ */
+static inline void vmathSoaP3MaxPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Minimum of two 3-D points per element
+ */
+static inline void vmathSoaP3MinPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Maximum element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3MaxElem( const VmathSoaPoint3 *pnt );
+
+/*
+ * Minimum element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3MinElem( const VmathSoaPoint3 *pnt );
+
+/*
+ * Compute the sum of all elements of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3Sum( const VmathSoaPoint3 *pnt );
+
+/*
+ * Apply uniform scale to a 3-D point
+ */
+static inline void vmathSoaP3Scale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, vec_float4 scaleVal );
+
+/*
+ * Apply non-uniform scale to a 3-D point
+ */
+static inline void vmathSoaP3NonUniformScale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Scalar projection of a 3-D point on a unit-length 3-D vector
+ */
+static inline vec_float4 vmathSoaP3Projection( const VmathSoaPoint3 *pnt, const VmathSoaVector3 *unitVec );
+
+/*
+ * Compute the square of the distance of a 3-D point from the coordinate-system origin
+ */
+static inline vec_float4 vmathSoaP3DistSqrFromOrigin( const VmathSoaPoint3 *pnt );
+
+/*
+ * Compute the distance of a 3-D point from the coordinate-system origin
+ */
+static inline vec_float4 vmathSoaP3DistFromOrigin( const VmathSoaPoint3 *pnt );
+
+/*
+ * Compute the square of the distance between two 3-D points
+ */
+static inline vec_float4 vmathSoaP3DistSqr( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Compute the distance between two 3-D points
+ */
+static inline vec_float4 vmathSoaP3Dist( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Linear interpolation between two 3-D points
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaP3Lerp( VmathSoaPoint3 *result, vec_float4 t, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Conditionally select between two 3-D points
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaP3Select( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_uint4 select1 );
+
+/*
+ * Load four three-float 3-D points, stored in three quadwords
+ */
+static inline void vmathSoaP3LoadXYZArray( VmathSoaPoint3 *pnt, const vec_float4 *threeQuads );
+
+/*
+ * Store four slots of an SoA 3-D point in three quadwords
+ */
+static inline void vmathSoaP3StoreXYZArray( const VmathSoaPoint3 *pnt, vec_float4 *threeQuads );
+
+/*
+ * Store eight slots of two SoA 3-D points as half-floats
+ */
+static inline void vmathSoaP3StoreHalfFloats( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D point
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaP3Print( const VmathSoaPoint3 *pnt );
+
+/*
+ * Print a 3-D point and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaP3Prints( const VmathSoaPoint3 *pnt, const char *name );
+
+#endif
+
+/*
+ * Copy a quaternion
+ */
+static inline void vmathSoaQCopy( VmathSoaQuat *result, const VmathSoaQuat *quat );
+
+/*
+ * Construct a quaternion from x, y, z, and w elements
+ */
+static inline void vmathSoaQMakeFromElems( VmathSoaQuat *result, vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
+
+/*
+ * Construct a quaternion from a 3-D vector and a scalar
+ */
+static inline void vmathSoaQMakeFromV3Scalar( VmathSoaQuat *result, const VmathSoaVector3 *xyz, vec_float4 w );
+
+/*
+ * Copy elements from a 4-D vector into a quaternion
+ */
+static inline void vmathSoaQMakeFromV4( VmathSoaQuat *result, const VmathSoaVector4 *vec );
+
+/*
+ * Convert a rotation matrix to a unit-length quaternion
+ */
+static inline void vmathSoaQMakeFromM3( VmathSoaQuat *result, const VmathSoaMatrix3 *rotMat );
+
+/*
+ * Set all elements of a quaternion to the same scalar value
+ */
+static inline void vmathSoaQMakeFromScalar( VmathSoaQuat *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS quaternion
+ */
+static inline void vmathSoaQMakeFromAos( VmathSoaQuat *result, const VmathQuat *quat );
+
+/*
+ * Insert four AoS quaternions
+ */
+static inline void vmathSoaQMakeFrom4Aos( VmathSoaQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, const VmathQuat *quat2, const VmathQuat *quat3 );
+
+/*
+ * Extract four AoS quaternions
+ */
+static inline void vmathSoaQGet4Aos( const VmathSoaQuat *quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 );
+
+/*
+ * Set the x, y, and z elements of a quaternion
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathSoaQSetXYZ( VmathSoaQuat *result, const VmathSoaVector3 *vec );
+
+/*
+ * Get the x, y, and z elements of a quaternion
+ */
+static inline void vmathSoaQGetXYZ( VmathSoaVector3 *result, const VmathSoaQuat *quat );
+
+/*
+ * Set the x element of a quaternion
+ */
+static inline void vmathSoaQSetX( VmathSoaQuat *result, vec_float4 x );
+
+/*
+ * Set the y element of a quaternion
+ */
+static inline void vmathSoaQSetY( VmathSoaQuat *result, vec_float4 y );
+
+/*
+ * Set the z element of a quaternion
+ */
+static inline void vmathSoaQSetZ( VmathSoaQuat *result, vec_float4 z );
+
+/*
+ * Set the w element of a quaternion
+ */
+static inline void vmathSoaQSetW( VmathSoaQuat *result, vec_float4 w );
+
+/*
+ * Get the x element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetX( const VmathSoaQuat *quat );
+
+/*
+ * Get the y element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetY( const VmathSoaQuat *quat );
+
+/*
+ * Get the z element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetZ( const VmathSoaQuat *quat );
+
+/*
+ * Get the w element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetW( const VmathSoaQuat *quat );
+
+/*
+ * Set an x, y, z, or w element of a quaternion by index
+ */
+static inline void vmathSoaQSetElem( VmathSoaQuat *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, z, or w element of a quaternion by index
+ */
+static inline vec_float4 vmathSoaQGetElem( const VmathSoaQuat *quat, int idx );
+
+/*
+ * Add two quaternions
+ */
+static inline void vmathSoaQAdd( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
+
+/*
+ * Subtract a quaternion from another quaternion
+ */
+static inline void vmathSoaQSub( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
+
+/*
+ * Multiply two quaternions
+ */
+static inline void vmathSoaQMul( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
+
+/*
+ * Multiply a quaternion by a scalar
+ */
+static inline void vmathSoaQScalarMul( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar );
+
+/*
+ * Divide a quaternion by a scalar
+ */
+static inline void vmathSoaQScalarDiv( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar );
+
+/*
+ * Negate all elements of a quaternion
+ */
+static inline void vmathSoaQNeg( VmathSoaQuat *result, const VmathSoaQuat *quat );
+
+/*
+ * Construct an identity quaternion
+ */
+static inline void vmathSoaQMakeIdentity( VmathSoaQuat *result );
+
+/*
+ * Construct a quaternion to rotate between two unit-length 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+ */
+static inline void vmathSoaQMakeRotationArc( VmathSoaQuat *result, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 );
+
+/*
+ * Construct a quaternion to rotate around a unit-length 3-D vector
+ */
+static inline void vmathSoaQMakeRotationAxis( VmathSoaQuat *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
+
+/*
+ * Construct a quaternion to rotate around the x axis
+ */
+static inline void vmathSoaQMakeRotationX( VmathSoaQuat *result, vec_float4 radians );
+
+/*
+ * Construct a quaternion to rotate around the y axis
+ */
+static inline void vmathSoaQMakeRotationY( VmathSoaQuat *result, vec_float4 radians );
+
+/*
+ * Construct a quaternion to rotate around the z axis
+ */
+static inline void vmathSoaQMakeRotationZ( VmathSoaQuat *result, vec_float4 radians );
+
+/*
+ * Compute the conjugate of a quaternion
+ */
+static inline void vmathSoaQConj( VmathSoaQuat *result, const VmathSoaQuat *quat );
+
+/*
+ * Use a unit-length quaternion to rotate a 3-D vector
+ */
+static inline void vmathSoaQRotate( VmathSoaVector3 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *vec );
+
+/*
+ * Compute the dot product of two quaternions
+ */
+static inline vec_float4 vmathSoaQDot( const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
+
+/*
+ * Compute the norm of a quaternion
+ */
+static inline vec_float4 vmathSoaQNorm( const VmathSoaQuat *quat );
+
+/*
+ * Compute the length of a quaternion
+ */
+static inline vec_float4 vmathSoaQLength( const VmathSoaQuat *quat );
+
+/*
+ * Normalize a quaternion
+ * NOTE: 
+ * The result is unpredictable when all elements of quat are at or near zero.
+ */
+static inline void vmathSoaQNormalize( VmathSoaQuat *result, const VmathSoaQuat *quat );
+
+/*
+ * Linear interpolation between two quaternions
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaQLerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
+
+/*
+ * Spherical linear interpolation between two quaternions
+ * NOTE: 
+ * Interpolates along the shortest path between orientations.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaQSlerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1 );
+
+/*
+ * Spherical quadrangle interpolation
+ */
+static inline void vmathSoaQSquad( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1, const VmathSoaQuat *unitQuat2, const VmathSoaQuat *unitQuat3 );
+
+/*
+ * Conditionally select between two quaternions
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaQSelect( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a quaternion
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaQPrint( const VmathSoaQuat *quat );
+
+/*
+ * Print a quaternion and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaQPrints( const VmathSoaQuat *quat, const char *name );
+
+#endif
+
+/*
+ * Copy a 3x3 matrix
+ */
+static inline void vmathSoaM3Copy( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Construct a 3x3 matrix containing the specified columns
+ */
+static inline void vmathSoaM3MakeFromCols( VmathSoaMatrix3 *result, const VmathSoaVector3 *col0, const VmathSoaVector3 *col1, const VmathSoaVector3 *col2 );
+
+/*
+ * Construct a 3x3 rotation matrix from a unit-length quaternion
+ */
+static inline void vmathSoaM3MakeFromQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat );
+
+/*
+ * Set all elements of a 3x3 matrix to the same scalar value
+ */
+static inline void vmathSoaM3MakeFromScalar( VmathSoaMatrix3 *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3x3 matrix
+ */
+static inline void vmathSoaM3MakeFromAos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Insert four AoS 3x3 matrices
+ */
+static inline void vmathSoaM3MakeFrom4Aos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, const VmathMatrix3 *mat2, const VmathMatrix3 *mat3 );
+
+/*
+ * Extract four AoS 3x3 matrices
+ */
+static inline void vmathSoaM3Get4Aos( const VmathSoaMatrix3 *mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 );
+
+/*
+ * Set column 0 of a 3x3 matrix
+ */
+static inline void vmathSoaM3SetCol0( VmathSoaMatrix3 *result, const VmathSoaVector3 *col0 );
+
+/*
+ * Set column 1 of a 3x3 matrix
+ */
+static inline void vmathSoaM3SetCol1( VmathSoaMatrix3 *result, const VmathSoaVector3 *col1 );
+
+/*
+ * Set column 2 of a 3x3 matrix
+ */
+static inline void vmathSoaM3SetCol2( VmathSoaMatrix3 *result, const VmathSoaVector3 *col2 );
+
+/*
+ * Get column 0 of a 3x3 matrix
+ */
+static inline void vmathSoaM3GetCol0( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Get column 1 of a 3x3 matrix
+ */
+static inline void vmathSoaM3GetCol1( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Get column 2 of a 3x3 matrix
+ */
+static inline void vmathSoaM3GetCol2( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Set the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathSoaM3SetCol( VmathSoaMatrix3 *result, int col, const VmathSoaVector3 *vec );
+
+/*
+ * Set the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathSoaM3SetRow( VmathSoaMatrix3 *result, int row, const VmathSoaVector3 *vec );
+
+/*
+ * Get the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathSoaM3GetCol( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int col );
+
+/*
+ * Get the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathSoaM3GetRow( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int row );
+
+/*
+ * Set the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline void vmathSoaM3SetElem( VmathSoaMatrix3 *result, int col, int row, vec_float4 val );
+
+/*
+ * Get the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline vec_float4 vmathSoaM3GetElem( const VmathSoaMatrix3 *mat, int col, int row );
+
+/*
+ * Add two 3x3 matrices
+ */
+static inline void vmathSoaM3Add( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
+
+/*
+ * Subtract a 3x3 matrix from another 3x3 matrix
+ */
+static inline void vmathSoaM3Sub( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
+
+/*
+ * Negate all elements of a 3x3 matrix
+ */
+static inline void vmathSoaM3Neg( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Multiply a 3x3 matrix by a scalar
+ */
+static inline void vmathSoaM3ScalarMul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, vec_float4 scalar );
+
+/*
+ * Multiply a 3x3 matrix by a 3-D vector
+ */
+static inline void vmathSoaM3MulV3( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *vec );
+
+/*
+ * Multiply two 3x3 matrices
+ */
+static inline void vmathSoaM3Mul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
+
+/*
+ * Construct an identity 3x3 matrix
+ */
+static inline void vmathSoaM3MakeIdentity( VmathSoaMatrix3 *result );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x axis
+ */
+static inline void vmathSoaM3MakeRotationX( VmathSoaMatrix3 *result, vec_float4 radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the y axis
+ */
+static inline void vmathSoaM3MakeRotationY( VmathSoaMatrix3 *result, vec_float4 radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the z axis
+ */
+static inline void vmathSoaM3MakeRotationZ( VmathSoaMatrix3 *result, vec_float4 radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathSoaM3MakeRotationZYX( VmathSoaMatrix3 *result, const VmathSoaVector3 *radiansXYZ );
+
+/*
+ * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathSoaM3MakeRotationAxis( VmathSoaMatrix3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathSoaM3MakeRotationQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat );
+
+/*
+ * Construct a 3x3 matrix to perform scaling
+ */
+static inline void vmathSoaM3MakeScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathSoaM3AppendScale( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathSoaM3PrependScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix3 *mat );
+
+/*
+ * Multiply two 3x3 matrices per element
+ */
+static inline void vmathSoaM3MulPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
+
+/*
+ * Compute the absolute value of a 3x3 matrix per element
+ */
+static inline void vmathSoaM3AbsPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Transpose of a 3x3 matrix
+ */
+static inline void vmathSoaM3Transpose( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Compute the inverse of a 3x3 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathSoaM3Inverse( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Determinant of a 3x3 matrix
+ */
+static inline vec_float4 vmathSoaM3Determinant( const VmathSoaMatrix3 *mat );
+
+/*
+ * Conditionally select between two 3x3 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaM3Select( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x3 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM3Print( const VmathSoaMatrix3 *mat );
+
+/*
+ * Print a 3x3 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM3Prints( const VmathSoaMatrix3 *mat, const char *name );
+
+#endif
+
+/*
+ * Copy a 4x4 matrix
+ */
+static inline void vmathSoaM4Copy( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Construct a 4x4 matrix containing the specified columns
+ */
+static inline void vmathSoaM4MakeFromCols( VmathSoaMatrix4 *result, const VmathSoaVector4 *col0, const VmathSoaVector4 *col1, const VmathSoaVector4 *col2, const VmathSoaVector4 *col3 );
+
+/*
+ * Construct a 4x4 matrix from a 3x4 transformation matrix
+ */
+static inline void vmathSoaM4MakeFromT3( VmathSoaMatrix4 *result, const VmathSoaTransform3 *mat );
+
+/*
+ * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline void vmathSoaM4MakeFromM3V3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *translateVec );
+
+/*
+ * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline void vmathSoaM4MakeFromQV3( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec );
+
+/*
+ * Set all elements of a 4x4 matrix to the same scalar value
+ */
+static inline void vmathSoaM4MakeFromScalar( VmathSoaMatrix4 *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS 4x4 matrix
+ */
+static inline void vmathSoaM4MakeFromAos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Insert four AoS 4x4 matrices
+ */
+static inline void vmathSoaM4MakeFrom4Aos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, const VmathMatrix4 *mat2, const VmathMatrix4 *mat3 );
+
+/*
+ * Extract four AoS 4x4 matrices
+ */
+static inline void vmathSoaM4Get4Aos( const VmathSoaMatrix4 *mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathSoaM4SetUpper3x3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 4x4 matrix
+ */
+static inline void vmathSoaM4GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Set translation component
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathSoaM4SetTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec );
+
+/*
+ * Get the translation component of a 4x4 matrix
+ */
+static inline void vmathSoaM4GetTranslation( VmathSoaVector3 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Set column 0 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol0( VmathSoaMatrix4 *result, const VmathSoaVector4 *col0 );
+
+/*
+ * Set column 1 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol1( VmathSoaMatrix4 *result, const VmathSoaVector4 *col1 );
+
+/*
+ * Set column 2 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol2( VmathSoaMatrix4 *result, const VmathSoaVector4 *col2 );
+
+/*
+ * Set column 3 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol3( VmathSoaMatrix4 *result, const VmathSoaVector4 *col3 );
+
+/*
+ * Get column 0 of a 4x4 matrix
+ */
+static inline void vmathSoaM4GetCol0( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Get column 1 of a 4x4 matrix
+ */
+static inline void vmathSoaM4GetCol1( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Get column 2 of a 4x4 matrix
+ */
+static inline void vmathSoaM4GetCol2( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Get column 3 of a 4x4 matrix
+ */
+static inline void vmathSoaM4GetCol3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Set the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathSoaM4SetCol( VmathSoaMatrix4 *result, int col, const VmathSoaVector4 *vec );
+
+/*
+ * Set the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathSoaM4SetRow( VmathSoaMatrix4 *result, int row, const VmathSoaVector4 *vec );
+
+/*
+ * Get the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathSoaM4GetCol( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int col );
+
+/*
+ * Get the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathSoaM4GetRow( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int row );
+
+/*
+ * Set the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline void vmathSoaM4SetElem( VmathSoaMatrix4 *result, int col, int row, vec_float4 val );
+
+/*
+ * Get the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline vec_float4 vmathSoaM4GetElem( const VmathSoaMatrix4 *mat, int col, int row );
+
+/*
+ * Add two 4x4 matrices
+ */
+static inline void vmathSoaM4Add( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
+
+/*
+ * Subtract a 4x4 matrix from another 4x4 matrix
+ */
+static inline void vmathSoaM4Sub( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
+
+/*
+ * Negate all elements of a 4x4 matrix
+ */
+static inline void vmathSoaM4Neg( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Multiply a 4x4 matrix by a scalar
+ */
+static inline void vmathSoaM4ScalarMul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, vec_float4 scalar );
+
+/*
+ * Multiply a 4x4 matrix by a 4-D vector
+ */
+static inline void vmathSoaM4MulV4( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector4 *vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D vector
+ */
+static inline void vmathSoaM4MulV3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D point
+ */
+static inline void vmathSoaM4MulP3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaPoint3 *pnt );
+
+/*
+ * Multiply two 4x4 matrices
+ */
+static inline void vmathSoaM4Mul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
+
+/*
+ * Multiply a 4x4 matrix by a 3x4 transformation matrix
+ */
+static inline void vmathSoaM4MulT3( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Construct an identity 4x4 matrix
+ */
+static inline void vmathSoaM4MakeIdentity( VmathSoaMatrix4 *result );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x axis
+ */
+static inline void vmathSoaM4MakeRotationX( VmathSoaMatrix4 *result, vec_float4 radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the y axis
+ */
+static inline void vmathSoaM4MakeRotationY( VmathSoaMatrix4 *result, vec_float4 radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the z axis
+ */
+static inline void vmathSoaM4MakeRotationZ( VmathSoaMatrix4 *result, vec_float4 radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathSoaM4MakeRotationZYX( VmathSoaMatrix4 *result, const VmathSoaVector3 *radiansXYZ );
+
+/*
+ * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathSoaM4MakeRotationAxis( VmathSoaMatrix4 *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathSoaM4MakeRotationQ( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat );
+
+/*
+ * Construct a 4x4 matrix to perform scaling
+ */
+static inline void vmathSoaM4MakeScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Construct a 4x4 matrix to perform translation
+ */
+static inline void vmathSoaM4MakeTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec );
+
+/*
+ * Construct viewing matrix based on eye position, position looked at, and up direction
+ */
+static inline void vmathSoaM4MakeLookAt( VmathSoaMatrix4 *result, const VmathSoaPoint3 *eyePos, const VmathSoaPoint3 *lookAtPos, const VmathSoaVector3 *upVec );
+
+/*
+ * Construct a perspective projection matrix
+ */
+static inline void vmathSoaM4MakePerspective( VmathSoaMatrix4 *result, vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar );
+
+/*
+ * Construct a perspective projection matrix based on frustum
+ */
+static inline void vmathSoaM4MakeFrustum( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
+
+/*
+ * Construct an orthographic projection matrix
+ */
+static inline void vmathSoaM4MakeOrthographic( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
+
+/*
+ * Append (post-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathSoaM4AppendScale( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathSoaM4PrependScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix4 *mat );
+
+/*
+ * Multiply two 4x4 matrices per element
+ */
+static inline void vmathSoaM4MulPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
+
+/*
+ * Compute the absolute value of a 4x4 matrix per element
+ */
+static inline void vmathSoaM4AbsPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Transpose of a 4x4 matrix
+ */
+static inline void vmathSoaM4Transpose( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathSoaM4Inverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathSoaM4AffineInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+ */
+static inline void vmathSoaM4OrthoInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Determinant of a 4x4 matrix
+ */
+static inline vec_float4 vmathSoaM4Determinant( const VmathSoaMatrix4 *mat );
+
+/*
+ * Conditionally select between two 4x4 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaM4Select( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4x4 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM4Print( const VmathSoaMatrix4 *mat );
+
+/*
+ * Print a 4x4 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM4Prints( const VmathSoaMatrix4 *mat, const char *name );
+
+#endif
+
+/*
+ * Copy a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3Copy( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Construct a 3x4 transformation matrix containing the specified columns
+ */
+static inline void vmathSoaT3MakeFromCols( VmathSoaTransform3 *result, const VmathSoaVector3 *col0, const VmathSoaVector3 *col1, const VmathSoaVector3 *col2, const VmathSoaVector3 *col3 );
+
+/*
+ * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline void vmathSoaT3MakeFromM3V3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *tfrm, const VmathSoaVector3 *translateVec );
+
+/*
+ * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline void vmathSoaT3MakeFromQV3( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec );
+
+/*
+ * Set all elements of a 3x4 transformation matrix to the same scalar value
+ */
+static inline void vmathSoaT3MakeFromScalar( VmathSoaTransform3 *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3x4 transformation matrix
+ */
+static inline void vmathSoaT3MakeFromAos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Insert four AoS 3x4 transformation matrices
+ */
+static inline void vmathSoaT3MakeFrom4Aos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, const VmathTransform3 *tfrm2, const VmathTransform3 *tfrm3 );
+
+/*
+ * Extract four AoS 3x4 transformation matrices
+ */
+static inline void vmathSoaT3Get4Aos( const VmathSoaTransform3 *tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ */
+static inline void vmathSoaT3SetUpper3x3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Set translation component
+ */
+static inline void vmathSoaT3SetTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec );
+
+/*
+ * Get the translation component of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3GetTranslation( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Set column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol0( VmathSoaTransform3 *result, const VmathSoaVector3 *col0 );
+
+/*
+ * Set column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol1( VmathSoaTransform3 *result, const VmathSoaVector3 *col1 );
+
+/*
+ * Set column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol2( VmathSoaTransform3 *result, const VmathSoaVector3 *col2 );
+
+/*
+ * Set column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol3( VmathSoaTransform3 *result, const VmathSoaVector3 *col3 );
+
+/*
+ * Get column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3GetCol0( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Get column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3GetCol1( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Get column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3GetCol2( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Get column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3GetCol3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Set the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathSoaT3SetCol( VmathSoaTransform3 *result, int col, const VmathSoaVector3 *vec );
+
+/*
+ * Set the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathSoaT3SetRow( VmathSoaTransform3 *result, int row, const VmathSoaVector4 *vec );
+
+/*
+ * Get the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathSoaT3GetCol( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, int col );
+
+/*
+ * Get the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathSoaT3GetRow( VmathSoaVector4 *result, const VmathSoaTransform3 *tfrm, int row );
+
+/*
+ * Set the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline void vmathSoaT3SetElem( VmathSoaTransform3 *result, int col, int row, vec_float4 val );
+
+/*
+ * Get the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline vec_float4 vmathSoaT3GetElem( const VmathSoaTransform3 *tfrm, int col, int row );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D vector
+ */
+static inline void vmathSoaT3MulV3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *vec );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D point
+ */
+static inline void vmathSoaT3MulP3( VmathSoaPoint3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaPoint3 *pnt );
+
+/*
+ * Multiply two 3x4 transformation matrices
+ */
+static inline void vmathSoaT3Mul( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 );
+
+/*
+ * Construct an identity 3x4 transformation matrix
+ */
+static inline void vmathSoaT3MakeIdentity( VmathSoaTransform3 *result );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x axis
+ */
+static inline void vmathSoaT3MakeRotationX( VmathSoaTransform3 *result, vec_float4 radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the y axis
+ */
+static inline void vmathSoaT3MakeRotationY( VmathSoaTransform3 *result, vec_float4 radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the z axis
+ */
+static inline void vmathSoaT3MakeRotationZ( VmathSoaTransform3 *result, vec_float4 radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathSoaT3MakeRotationZYX( VmathSoaTransform3 *result, const VmathSoaVector3 *radiansXYZ );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathSoaT3MakeRotationAxis( VmathSoaTransform3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathSoaT3MakeRotationQ( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat );
+
+/*
+ * Construct a 3x4 transformation matrix to perform scaling
+ */
+static inline void vmathSoaT3MakeScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Construct a 3x4 transformation matrix to perform translation
+ */
+static inline void vmathSoaT3MakeTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathSoaT3AppendScale( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathSoaT3PrependScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Multiply two 3x4 transformation matrices per element
+ */
+static inline void vmathSoaT3MulPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 );
+
+/*
+ * Compute the absolute value of a 3x4 transformation matrix per element
+ */
+static inline void vmathSoaT3AbsPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Inverse of a 3x4 transformation matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+ */
+static inline void vmathSoaT3Inverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+ */
+static inline void vmathSoaT3OrthoInverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Conditionally select between two 3x4 transformation matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaT3Select( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x4 transformation matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaT3Print( const VmathSoaTransform3 *tfrm );
+
+/*
+ * Print a 3x4 transformation matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaT3Prints( const VmathSoaTransform3 *tfrm, const char *name );
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#include "vec_soa.h"
+#include "quat_soa.h"
+#include "mat_soa.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_soa_v.h b/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_soa_v.h
index 17065dd5e..85ebed3ac 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_soa_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_soa_v.h
@@ -1,1978 +1,1978 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_SOA_C_V_SPU_H
-#define _VECTORMATH_SOA_C_V_SPU_H
-
-#include <math.h>
-#include <spu_intrinsics.h>
-#include "vectormath_aos_v.h"
-
-#ifdef _VECTORMATH_DEBUG
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#ifndef _VECTORMATH_SOA_C_TYPES_H
-#define _VECTORMATH_SOA_C_TYPES_H
-
-/* A set of four 3-D vectors in structure-of-arrays format
- */
-typedef struct _VmathSoaVector3
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-} VmathSoaVector3;
-
-/* A set of four 4-D vectors in structure-of-arrays format
- */
-typedef struct _VmathSoaVector4
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-    vec_float4 w;
-} VmathSoaVector4;
-
-/* A set of four 3-D points in structure-of-arrays format
- */
-typedef struct _VmathSoaPoint3
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-} VmathSoaPoint3;
-
-/* A set of four quaternions in structure-of-arrays format
- */
-typedef struct _VmathSoaQuat
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-    vec_float4 w;
-} VmathSoaQuat;
-
-/* A set of four 3x3 matrices in structure-of-arrays format
- */
-typedef struct _VmathSoaMatrix3
-{
-    VmathSoaVector3 col0;
-    VmathSoaVector3 col1;
-    VmathSoaVector3 col2;
-} VmathSoaMatrix3;
-
-/* A set of four 4x4 matrices in structure-of-arrays format
- */
-typedef struct _VmathSoaMatrix4
-{
-    VmathSoaVector4 col0;
-    VmathSoaVector4 col1;
-    VmathSoaVector4 col2;
-    VmathSoaVector4 col3;
-} VmathSoaMatrix4;
-
-/* A set of four 3x4 transformation matrices in structure-of-arrays format
- */
-typedef struct _VmathSoaTransform3
-{
-    VmathSoaVector3 col0;
-    VmathSoaVector3 col1;
-    VmathSoaVector3 col2;
-    VmathSoaVector3 col3;
-} VmathSoaTransform3;
-
-#endif
-
-/*
- * Construct a 3-D vector from x, y, and z elements
- */
-static inline VmathSoaVector3 vmathSoaV3MakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z );
-
-/*
- * Copy elements from a 3-D point into a 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaV3MakeFromP3_V( VmathSoaPoint3 pnt );
-
-/*
- * Set all elements of a 3-D vector to the same scalar value
- */
-static inline VmathSoaVector3 vmathSoaV3MakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaV3MakeFromAos_V( VmathVector3 vec );
-
-/*
- * Insert four AoS 3-D vectors
- */
-static inline VmathSoaVector3 vmathSoaV3MakeFrom4Aos_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3 );
-
-/*
- * Extract four AoS 3-D vectors
- */
-static inline void vmathSoaV3Get4Aos_V( VmathSoaVector3 vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 );
-
-/*
- * Set the x element of a 3-D vector
- */
-static inline void vmathSoaV3SetX_V( VmathSoaVector3 *result, vec_float4 x );
-
-/*
- * Set the y element of a 3-D vector
- */
-static inline void vmathSoaV3SetY_V( VmathSoaVector3 *result, vec_float4 y );
-
-/*
- * Set the z element of a 3-D vector
- */
-static inline void vmathSoaV3SetZ_V( VmathSoaVector3 *result, vec_float4 z );
-
-/*
- * Get the x element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3GetX_V( VmathSoaVector3 vec );
-
-/*
- * Get the y element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3GetY_V( VmathSoaVector3 vec );
-
-/*
- * Get the z element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3GetZ_V( VmathSoaVector3 vec );
-
-/*
- * Set an x, y, or z element of a 3-D vector by index
- */
-static inline void vmathSoaV3SetElem_V( VmathSoaVector3 *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, or z element of a 3-D vector by index
- */
-static inline vec_float4 vmathSoaV3GetElem_V( VmathSoaVector3 vec, int idx );
-
-/*
- * Add two 3-D vectors
- */
-static inline VmathSoaVector3 vmathSoaV3Add_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Subtract a 3-D vector from another 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaV3Sub_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Add a 3-D vector to a 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaV3AddP3_V( VmathSoaVector3 vec, VmathSoaPoint3 pnt );
-
-/*
- * Multiply a 3-D vector by a scalar
- */
-static inline VmathSoaVector3 vmathSoaV3ScalarMul_V( VmathSoaVector3 vec, vec_float4 scalar );
-
-/*
- * Divide a 3-D vector by a scalar
- */
-static inline VmathSoaVector3 vmathSoaV3ScalarDiv_V( VmathSoaVector3 vec, vec_float4 scalar );
-
-/*
- * Negate all elements of a 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaV3Neg_V( VmathSoaVector3 vec );
-
-/*
- * Construct x axis
- */
-static inline VmathSoaVector3 vmathSoaV3MakeXAxis_V( );
-
-/*
- * Construct y axis
- */
-static inline VmathSoaVector3 vmathSoaV3MakeYAxis_V( );
-
-/*
- * Construct z axis
- */
-static inline VmathSoaVector3 vmathSoaV3MakeZAxis_V( );
-
-/*
- * Multiply two 3-D vectors per element
- */
-static inline VmathSoaVector3 vmathSoaV3MulPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Divide two 3-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathSoaVector3 vmathSoaV3DivPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Compute the reciprocal of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathSoaVector3 vmathSoaV3RecipPerElem_V( VmathSoaVector3 vec );
-
-/*
- * Compute the square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathSoaVector3 vmathSoaV3SqrtPerElem_V( VmathSoaVector3 vec );
-
-/*
- * Compute the reciprocal square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathSoaVector3 vmathSoaV3RsqrtPerElem_V( VmathSoaVector3 vec );
-
-/*
- * Compute the absolute value of a 3-D vector per element
- */
-static inline VmathSoaVector3 vmathSoaV3AbsPerElem_V( VmathSoaVector3 vec );
-
-/*
- * Copy sign from one 3-D vector to another, per element
- */
-static inline VmathSoaVector3 vmathSoaV3CopySignPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Maximum of two 3-D vectors per element
- */
-static inline VmathSoaVector3 vmathSoaV3MaxPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Minimum of two 3-D vectors per element
- */
-static inline VmathSoaVector3 vmathSoaV3MinPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Maximum element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3MaxElem_V( VmathSoaVector3 vec );
-
-/*
- * Minimum element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3MinElem_V( VmathSoaVector3 vec );
-
-/*
- * Compute the sum of all elements of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3Sum_V( VmathSoaVector3 vec );
-
-/*
- * Compute the dot product of two 3-D vectors
- */
-static inline vec_float4 vmathSoaV3Dot_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Compute the square of the length of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3LengthSqr_V( VmathSoaVector3 vec );
-
-/*
- * Compute the length of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3Length_V( VmathSoaVector3 vec );
-
-/*
- * Normalize a 3-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline VmathSoaVector3 vmathSoaV3Normalize_V( VmathSoaVector3 vec );
-
-/*
- * Compute cross product of two 3-D vectors
- */
-static inline VmathSoaVector3 vmathSoaV3Cross_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Outer product of two 3-D vectors
- */
-static inline VmathSoaMatrix3 vmathSoaV3Outer_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Pre-multiply a row vector by a 3x3 matrix
- */
-static inline VmathSoaVector3 vmathSoaV3RowMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat );
-
-/*
- * Cross-product matrix of a 3-D vector
- */
-static inline VmathSoaMatrix3 vmathSoaV3CrossMatrix_V( VmathSoaVector3 vec );
-
-/*
- * Create cross-product matrix and multiply
- * NOTE: 
- * Faster than separately creating a cross-product matrix and multiplying.
- */
-static inline VmathSoaMatrix3 vmathSoaV3CrossMatrixMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat );
-
-/*
- * Linear interpolation between two 3-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaVector3 vmathSoaV3Lerp_V( vec_float4 t, VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Spherical linear interpolation between two 3-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaVector3 vmathSoaV3Slerp_V( vec_float4 t, VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 );
-
-/*
- * Conditionally select between two 3-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaVector3 vmathSoaV3Select_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_uint4 select1 );
-
-/*
- * Load four three-float 3-D vectors, stored in three quadwords
- */
-static inline void vmathSoaV3LoadXYZArray_V( VmathSoaVector3 *vec, const vec_float4 *threeQuads );
-
-/*
- * Store four slots of an SoA 3-D vector in three quadwords
- */
-static inline void vmathSoaV3StoreXYZArray_V( VmathSoaVector3 vec, vec_float4 *threeQuads );
-
-/*
- * Store eight slots of two SoA 3-D vectors as half-floats
- */
-static inline void vmathSoaV3StoreHalfFloats_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV3Print_V( VmathSoaVector3 vec );
-
-/*
- * Print a 3-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV3Prints_V( VmathSoaVector3 vec, const char *name );
-
-#endif
-
-/*
- * Construct a 4-D vector from x, y, z, and w elements
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
-
-/*
- * Construct a 4-D vector from a 3-D vector and a scalar
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 w );
-
-/*
- * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromV3_V( VmathSoaVector3 vec );
-
-/*
- * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromP3_V( VmathSoaPoint3 pnt );
-
-/*
- * Copy elements from a quaternion into a 4-D vector
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromQ_V( VmathSoaQuat quat );
-
-/*
- * Set all elements of a 4-D vector to the same scalar value
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS 4-D vector
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromAos_V( VmathVector4 vec );
-
-/*
- * Insert four AoS 4-D vectors
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFrom4Aos_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3 );
-
-/*
- * Extract four AoS 4-D vectors
- */
-static inline void vmathSoaV4Get4Aos_V( VmathSoaVector4 vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 );
-
-/*
- * Set the x, y, and z elements of a 4-D vector
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathSoaV4SetXYZ_V( VmathSoaVector4 *result, VmathSoaVector3 vec );
-
-/*
- * Get the x, y, and z elements of a 4-D vector
- */
-static inline VmathSoaVector3 vmathSoaV4GetXYZ_V( VmathSoaVector4 vec );
-
-/*
- * Set the x element of a 4-D vector
- */
-static inline void vmathSoaV4SetX_V( VmathSoaVector4 *result, vec_float4 x );
-
-/*
- * Set the y element of a 4-D vector
- */
-static inline void vmathSoaV4SetY_V( VmathSoaVector4 *result, vec_float4 y );
-
-/*
- * Set the z element of a 4-D vector
- */
-static inline void vmathSoaV4SetZ_V( VmathSoaVector4 *result, vec_float4 z );
-
-/*
- * Set the w element of a 4-D vector
- */
-static inline void vmathSoaV4SetW_V( VmathSoaVector4 *result, vec_float4 w );
-
-/*
- * Get the x element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetX_V( VmathSoaVector4 vec );
-
-/*
- * Get the y element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetY_V( VmathSoaVector4 vec );
-
-/*
- * Get the z element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetZ_V( VmathSoaVector4 vec );
-
-/*
- * Get the w element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetW_V( VmathSoaVector4 vec );
-
-/*
- * Set an x, y, z, or w element of a 4-D vector by index
- */
-static inline void vmathSoaV4SetElem_V( VmathSoaVector4 *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, z, or w element of a 4-D vector by index
- */
-static inline vec_float4 vmathSoaV4GetElem_V( VmathSoaVector4 vec, int idx );
-
-/*
- * Add two 4-D vectors
- */
-static inline VmathSoaVector4 vmathSoaV4Add_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Subtract a 4-D vector from another 4-D vector
- */
-static inline VmathSoaVector4 vmathSoaV4Sub_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Multiply a 4-D vector by a scalar
- */
-static inline VmathSoaVector4 vmathSoaV4ScalarMul_V( VmathSoaVector4 vec, vec_float4 scalar );
-
-/*
- * Divide a 4-D vector by a scalar
- */
-static inline VmathSoaVector4 vmathSoaV4ScalarDiv_V( VmathSoaVector4 vec, vec_float4 scalar );
-
-/*
- * Negate all elements of a 4-D vector
- */
-static inline VmathSoaVector4 vmathSoaV4Neg_V( VmathSoaVector4 vec );
-
-/*
- * Construct x axis
- */
-static inline VmathSoaVector4 vmathSoaV4MakeXAxis_V( );
-
-/*
- * Construct y axis
- */
-static inline VmathSoaVector4 vmathSoaV4MakeYAxis_V( );
-
-/*
- * Construct z axis
- */
-static inline VmathSoaVector4 vmathSoaV4MakeZAxis_V( );
-
-/*
- * Construct w axis
- */
-static inline VmathSoaVector4 vmathSoaV4MakeWAxis_V( );
-
-/*
- * Multiply two 4-D vectors per element
- */
-static inline VmathSoaVector4 vmathSoaV4MulPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Divide two 4-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathSoaVector4 vmathSoaV4DivPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Compute the reciprocal of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathSoaVector4 vmathSoaV4RecipPerElem_V( VmathSoaVector4 vec );
-
-/*
- * Compute the square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathSoaVector4 vmathSoaV4SqrtPerElem_V( VmathSoaVector4 vec );
-
-/*
- * Compute the reciprocal square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathSoaVector4 vmathSoaV4RsqrtPerElem_V( VmathSoaVector4 vec );
-
-/*
- * Compute the absolute value of a 4-D vector per element
- */
-static inline VmathSoaVector4 vmathSoaV4AbsPerElem_V( VmathSoaVector4 vec );
-
-/*
- * Copy sign from one 4-D vector to another, per element
- */
-static inline VmathSoaVector4 vmathSoaV4CopySignPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Maximum of two 4-D vectors per element
- */
-static inline VmathSoaVector4 vmathSoaV4MaxPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Minimum of two 4-D vectors per element
- */
-static inline VmathSoaVector4 vmathSoaV4MinPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Maximum element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4MaxElem_V( VmathSoaVector4 vec );
-
-/*
- * Minimum element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4MinElem_V( VmathSoaVector4 vec );
-
-/*
- * Compute the sum of all elements of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4Sum_V( VmathSoaVector4 vec );
-
-/*
- * Compute the dot product of two 4-D vectors
- */
-static inline vec_float4 vmathSoaV4Dot_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Compute the square of the length of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4LengthSqr_V( VmathSoaVector4 vec );
-
-/*
- * Compute the length of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4Length_V( VmathSoaVector4 vec );
-
-/*
- * Normalize a 4-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline VmathSoaVector4 vmathSoaV4Normalize_V( VmathSoaVector4 vec );
-
-/*
- * Outer product of two 4-D vectors
- */
-static inline VmathSoaMatrix4 vmathSoaV4Outer_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Linear interpolation between two 4-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaVector4 vmathSoaV4Lerp_V( vec_float4 t, VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Spherical linear interpolation between two 4-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaVector4 vmathSoaV4Slerp_V( vec_float4 t, VmathSoaVector4 unitVec0, VmathSoaVector4 unitVec1 );
-
-/*
- * Conditionally select between two 4-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaVector4 vmathSoaV4Select_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1, vec_uint4 select1 );
-
-/*
- * Store four slots of an SoA 4-D vector as half-floats
- */
-static inline void vmathSoaV4StoreHalfFloats_V( VmathSoaVector4 vec, vec_ushort8 *twoQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV4Print_V( VmathSoaVector4 vec );
-
-/*
- * Print a 4-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV4Prints_V( VmathSoaVector4 vec, const char *name );
-
-#endif
-
-/*
- * Construct a 3-D point from x, y, and z elements
- */
-static inline VmathSoaPoint3 vmathSoaP3MakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z );
-
-/*
- * Copy elements from a 3-D vector into a 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaP3MakeFromV3_V( VmathSoaVector3 vec );
-
-/*
- * Set all elements of a 3-D point to the same scalar value
- */
-static inline VmathSoaPoint3 vmathSoaP3MakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaP3MakeFromAos_V( VmathPoint3 pnt );
-
-/*
- * Insert four AoS 3-D points
- */
-static inline VmathSoaPoint3 vmathSoaP3MakeFrom4Aos_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3 );
-
-/*
- * Extract four AoS 3-D points
- */
-static inline void vmathSoaP3Get4Aos_V( VmathSoaPoint3 pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 );
-
-/*
- * Set the x element of a 3-D point
- */
-static inline void vmathSoaP3SetX_V( VmathSoaPoint3 *result, vec_float4 x );
-
-/*
- * Set the y element of a 3-D point
- */
-static inline void vmathSoaP3SetY_V( VmathSoaPoint3 *result, vec_float4 y );
-
-/*
- * Set the z element of a 3-D point
- */
-static inline void vmathSoaP3SetZ_V( VmathSoaPoint3 *result, vec_float4 z );
-
-/*
- * Get the x element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3GetX_V( VmathSoaPoint3 pnt );
-
-/*
- * Get the y element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3GetY_V( VmathSoaPoint3 pnt );
-
-/*
- * Get the z element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3GetZ_V( VmathSoaPoint3 pnt );
-
-/*
- * Set an x, y, or z element of a 3-D point by index
- */
-static inline void vmathSoaP3SetElem_V( VmathSoaPoint3 *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, or z element of a 3-D point by index
- */
-static inline vec_float4 vmathSoaP3GetElem_V( VmathSoaPoint3 pnt, int idx );
-
-/*
- * Subtract a 3-D point from another 3-D point
- */
-static inline VmathSoaVector3 vmathSoaP3Sub_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Add a 3-D point to a 3-D vector
- */
-static inline VmathSoaPoint3 vmathSoaP3AddV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec );
-
-/*
- * Subtract a 3-D vector from a 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaP3SubV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec );
-
-/*
- * Multiply two 3-D points per element
- */
-static inline VmathSoaPoint3 vmathSoaP3MulPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Divide two 3-D points per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathSoaPoint3 vmathSoaP3DivPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Compute the reciprocal of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathSoaPoint3 vmathSoaP3RecipPerElem_V( VmathSoaPoint3 pnt );
-
-/*
- * Compute the square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathSoaPoint3 vmathSoaP3SqrtPerElem_V( VmathSoaPoint3 pnt );
-
-/*
- * Compute the reciprocal square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathSoaPoint3 vmathSoaP3RsqrtPerElem_V( VmathSoaPoint3 pnt );
-
-/*
- * Compute the absolute value of a 3-D point per element
- */
-static inline VmathSoaPoint3 vmathSoaP3AbsPerElem_V( VmathSoaPoint3 pnt );
-
-/*
- * Copy sign from one 3-D point to another, per element
- */
-static inline VmathSoaPoint3 vmathSoaP3CopySignPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Maximum of two 3-D points per element
- */
-static inline VmathSoaPoint3 vmathSoaP3MaxPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Minimum of two 3-D points per element
- */
-static inline VmathSoaPoint3 vmathSoaP3MinPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Maximum element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3MaxElem_V( VmathSoaPoint3 pnt );
-
-/*
- * Minimum element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3MinElem_V( VmathSoaPoint3 pnt );
-
-/*
- * Compute the sum of all elements of a 3-D point
- */
-static inline vec_float4 vmathSoaP3Sum_V( VmathSoaPoint3 pnt );
-
-/*
- * Apply uniform scale to a 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaP3Scale_V( VmathSoaPoint3 pnt, vec_float4 scaleVal );
-
-/*
- * Apply non-uniform scale to a 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaP3NonUniformScale_V( VmathSoaPoint3 pnt, VmathSoaVector3 scaleVec );
-
-/*
- * Scalar projection of a 3-D point on a unit-length 3-D vector
- */
-static inline vec_float4 vmathSoaP3Projection_V( VmathSoaPoint3 pnt, VmathSoaVector3 unitVec );
-
-/*
- * Compute the square of the distance of a 3-D point from the coordinate-system origin
- */
-static inline vec_float4 vmathSoaP3DistSqrFromOrigin_V( VmathSoaPoint3 pnt );
-
-/*
- * Compute the distance of a 3-D point from the coordinate-system origin
- */
-static inline vec_float4 vmathSoaP3DistFromOrigin_V( VmathSoaPoint3 pnt );
-
-/*
- * Compute the square of the distance between two 3-D points
- */
-static inline vec_float4 vmathSoaP3DistSqr_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Compute the distance between two 3-D points
- */
-static inline vec_float4 vmathSoaP3Dist_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Linear interpolation between two 3-D points
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaPoint3 vmathSoaP3Lerp_V( vec_float4 t, VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Conditionally select between two 3-D points
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaPoint3 vmathSoaP3Select_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_uint4 select1 );
-
-/*
- * Load four three-float 3-D points, stored in three quadwords
- */
-static inline void vmathSoaP3LoadXYZArray_V( VmathSoaPoint3 *pnt, const vec_float4 *threeQuads );
-
-/*
- * Store four slots of an SoA 3-D point in three quadwords
- */
-static inline void vmathSoaP3StoreXYZArray_V( VmathSoaPoint3 pnt, vec_float4 *threeQuads );
-
-/*
- * Store eight slots of two SoA 3-D points as half-floats
- */
-static inline void vmathSoaP3StoreHalfFloats_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D point
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaP3Print_V( VmathSoaPoint3 pnt );
-
-/*
- * Print a 3-D point and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaP3Prints_V( VmathSoaPoint3 pnt, const char *name );
-
-#endif
-
-/*
- * Construct a quaternion from x, y, z, and w elements
- */
-static inline VmathSoaQuat vmathSoaQMakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
-
-/*
- * Construct a quaternion from a 3-D vector and a scalar
- */
-static inline VmathSoaQuat vmathSoaQMakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 w );
-
-/*
- * Copy elements from a 4-D vector into a quaternion
- */
-static inline VmathSoaQuat vmathSoaQMakeFromV4_V( VmathSoaVector4 vec );
-
-/*
- * Convert a rotation matrix to a unit-length quaternion
- */
-static inline VmathSoaQuat vmathSoaQMakeFromM3_V( VmathSoaMatrix3 rotMat );
-
-/*
- * Set all elements of a quaternion to the same scalar value
- */
-static inline VmathSoaQuat vmathSoaQMakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS quaternion
- */
-static inline VmathSoaQuat vmathSoaQMakeFromAos_V( VmathQuat quat );
-
-/*
- * Insert four AoS quaternions
- */
-static inline VmathSoaQuat vmathSoaQMakeFrom4Aos_V( VmathQuat quat0, VmathQuat quat1, VmathQuat quat2, VmathQuat quat3 );
-
-/*
- * Extract four AoS quaternions
- */
-static inline void vmathSoaQGet4Aos_V( VmathSoaQuat quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 );
-
-/*
- * Set the x, y, and z elements of a quaternion
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathSoaQSetXYZ_V( VmathSoaQuat *result, VmathSoaVector3 vec );
-
-/*
- * Get the x, y, and z elements of a quaternion
- */
-static inline VmathSoaVector3 vmathSoaQGetXYZ_V( VmathSoaQuat quat );
-
-/*
- * Set the x element of a quaternion
- */
-static inline void vmathSoaQSetX_V( VmathSoaQuat *result, vec_float4 x );
-
-/*
- * Set the y element of a quaternion
- */
-static inline void vmathSoaQSetY_V( VmathSoaQuat *result, vec_float4 y );
-
-/*
- * Set the z element of a quaternion
- */
-static inline void vmathSoaQSetZ_V( VmathSoaQuat *result, vec_float4 z );
-
-/*
- * Set the w element of a quaternion
- */
-static inline void vmathSoaQSetW_V( VmathSoaQuat *result, vec_float4 w );
-
-/*
- * Get the x element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetX_V( VmathSoaQuat quat );
-
-/*
- * Get the y element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetY_V( VmathSoaQuat quat );
-
-/*
- * Get the z element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetZ_V( VmathSoaQuat quat );
-
-/*
- * Get the w element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetW_V( VmathSoaQuat quat );
-
-/*
- * Set an x, y, z, or w element of a quaternion by index
- */
-static inline void vmathSoaQSetElem_V( VmathSoaQuat *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, z, or w element of a quaternion by index
- */
-static inline vec_float4 vmathSoaQGetElem_V( VmathSoaQuat quat, int idx );
-
-/*
- * Add two quaternions
- */
-static inline VmathSoaQuat vmathSoaQAdd_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
-
-/*
- * Subtract a quaternion from another quaternion
- */
-static inline VmathSoaQuat vmathSoaQSub_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
-
-/*
- * Multiply two quaternions
- */
-static inline VmathSoaQuat vmathSoaQMul_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
-
-/*
- * Multiply a quaternion by a scalar
- */
-static inline VmathSoaQuat vmathSoaQScalarMul_V( VmathSoaQuat quat, vec_float4 scalar );
-
-/*
- * Divide a quaternion by a scalar
- */
-static inline VmathSoaQuat vmathSoaQScalarDiv_V( VmathSoaQuat quat, vec_float4 scalar );
-
-/*
- * Negate all elements of a quaternion
- */
-static inline VmathSoaQuat vmathSoaQNeg_V( VmathSoaQuat quat );
-
-/*
- * Construct an identity quaternion
- */
-static inline VmathSoaQuat vmathSoaQMakeIdentity_V( );
-
-/*
- * Construct a quaternion to rotate between two unit-length 3-D vectors
- * NOTE: 
- * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
- */
-static inline VmathSoaQuat vmathSoaQMakeRotationArc_V( VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 );
-
-/*
- * Construct a quaternion to rotate around a unit-length 3-D vector
- */
-static inline VmathSoaQuat vmathSoaQMakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
-
-/*
- * Construct a quaternion to rotate around the x axis
- */
-static inline VmathSoaQuat vmathSoaQMakeRotationX_V( vec_float4 radians );
-
-/*
- * Construct a quaternion to rotate around the y axis
- */
-static inline VmathSoaQuat vmathSoaQMakeRotationY_V( vec_float4 radians );
-
-/*
- * Construct a quaternion to rotate around the z axis
- */
-static inline VmathSoaQuat vmathSoaQMakeRotationZ_V( vec_float4 radians );
-
-/*
- * Compute the conjugate of a quaternion
- */
-static inline VmathSoaQuat vmathSoaQConj_V( VmathSoaQuat quat );
-
-/*
- * Use a unit-length quaternion to rotate a 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaQRotate_V( VmathSoaQuat unitQuat, VmathSoaVector3 vec );
-
-/*
- * Compute the dot product of two quaternions
- */
-static inline vec_float4 vmathSoaQDot_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
-
-/*
- * Compute the norm of a quaternion
- */
-static inline vec_float4 vmathSoaQNorm_V( VmathSoaQuat quat );
-
-/*
- * Compute the length of a quaternion
- */
-static inline vec_float4 vmathSoaQLength_V( VmathSoaQuat quat );
-
-/*
- * Normalize a quaternion
- * NOTE: 
- * The result is unpredictable when all elements of quat are at or near zero.
- */
-static inline VmathSoaQuat vmathSoaQNormalize_V( VmathSoaQuat quat );
-
-/*
- * Linear interpolation between two quaternions
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaQuat vmathSoaQLerp_V( vec_float4 t, VmathSoaQuat quat0, VmathSoaQuat quat1 );
-
-/*
- * Spherical linear interpolation between two quaternions
- * NOTE: 
- * Interpolates along the shortest path between orientations.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaQuat vmathSoaQSlerp_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1 );
-
-/*
- * Spherical quadrangle interpolation
- */
-static inline VmathSoaQuat vmathSoaQSquad_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1, VmathSoaQuat unitQuat2, VmathSoaQuat unitQuat3 );
-
-/*
- * Conditionally select between two quaternions
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaQuat vmathSoaQSelect_V( VmathSoaQuat quat0, VmathSoaQuat quat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a quaternion
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaQPrint_V( VmathSoaQuat quat );
-
-/*
- * Print a quaternion and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaQPrints_V( VmathSoaQuat quat, const char *name );
-
-#endif
-
-/*
- * Construct a 3x3 matrix containing the specified columns
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromCols_V( VmathSoaVector3 col0, VmathSoaVector3 col1, VmathSoaVector3 col2 );
-
-/*
- * Construct a 3x3 rotation matrix from a unit-length quaternion
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromQ_V( VmathSoaQuat unitQuat );
-
-/*
- * Set all elements of a 3x3 matrix to the same scalar value
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS 3x3 matrix
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromAos_V( VmathMatrix3 mat );
-
-/*
- * Insert four AoS 3x3 matrices
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeFrom4Aos_V( VmathMatrix3 mat0, VmathMatrix3 mat1, VmathMatrix3 mat2, VmathMatrix3 mat3 );
-
-/*
- * Extract four AoS 3x3 matrices
- */
-static inline void vmathSoaM3Get4Aos_V( VmathSoaMatrix3 mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 );
-
-/*
- * Set column 0 of a 3x3 matrix
- */
-static inline void vmathSoaM3SetCol0_V( VmathSoaMatrix3 *result, VmathSoaVector3 col0 );
-
-/*
- * Set column 1 of a 3x3 matrix
- */
-static inline void vmathSoaM3SetCol1_V( VmathSoaMatrix3 *result, VmathSoaVector3 col1 );
-
-/*
- * Set column 2 of a 3x3 matrix
- */
-static inline void vmathSoaM3SetCol2_V( VmathSoaMatrix3 *result, VmathSoaVector3 col2 );
-
-/*
- * Get column 0 of a 3x3 matrix
- */
-static inline VmathSoaVector3 vmathSoaM3GetCol0_V( VmathSoaMatrix3 mat );
-
-/*
- * Get column 1 of a 3x3 matrix
- */
-static inline VmathSoaVector3 vmathSoaM3GetCol1_V( VmathSoaMatrix3 mat );
-
-/*
- * Get column 2 of a 3x3 matrix
- */
-static inline VmathSoaVector3 vmathSoaM3GetCol2_V( VmathSoaMatrix3 mat );
-
-/*
- * Set the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathSoaM3SetCol_V( VmathSoaMatrix3 *result, int col, VmathSoaVector3 vec );
-
-/*
- * Set the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathSoaM3SetRow_V( VmathSoaMatrix3 *result, int row, VmathSoaVector3 vec );
-
-/*
- * Get the column of a 3x3 matrix referred to by the specified index
- */
-static inline VmathSoaVector3 vmathSoaM3GetCol_V( VmathSoaMatrix3 mat, int col );
-
-/*
- * Get the row of a 3x3 matrix referred to by the specified index
- */
-static inline VmathSoaVector3 vmathSoaM3GetRow_V( VmathSoaMatrix3 mat, int row );
-
-/*
- * Set the element of a 3x3 matrix referred to by column and row indices
- */
-static inline void vmathSoaM3SetElem_V( VmathSoaMatrix3 *result, int col, int row, vec_float4 val );
-
-/*
- * Get the element of a 3x3 matrix referred to by column and row indices
- */
-static inline vec_float4 vmathSoaM3GetElem_V( VmathSoaMatrix3 mat, int col, int row );
-
-/*
- * Add two 3x3 matrices
- */
-static inline VmathSoaMatrix3 vmathSoaM3Add_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
-
-/*
- * Subtract a 3x3 matrix from another 3x3 matrix
- */
-static inline VmathSoaMatrix3 vmathSoaM3Sub_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
-
-/*
- * Negate all elements of a 3x3 matrix
- */
-static inline VmathSoaMatrix3 vmathSoaM3Neg_V( VmathSoaMatrix3 mat );
-
-/*
- * Multiply a 3x3 matrix by a scalar
- */
-static inline VmathSoaMatrix3 vmathSoaM3ScalarMul_V( VmathSoaMatrix3 mat, vec_float4 scalar );
-
-/*
- * Multiply a 3x3 matrix by a 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaM3MulV3_V( VmathSoaMatrix3 mat, VmathSoaVector3 vec );
-
-/*
- * Multiply two 3x3 matrices
- */
-static inline VmathSoaMatrix3 vmathSoaM3Mul_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
-
-/*
- * Construct an identity 3x3 matrix
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeIdentity_V( );
-
-/*
- * Construct a 3x3 matrix to rotate around the x axis
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationX_V( vec_float4 radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the y axis
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationY_V( vec_float4 radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the z axis
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZ_V( vec_float4 radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the x, y, and z axes
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ );
-
-/*
- * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationQ_V( VmathSoaQuat unitQuat );
-
-/*
- * Construct a 3x3 matrix to perform scaling
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeScale_V( VmathSoaVector3 scaleVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathSoaMatrix3 vmathSoaM3AppendScale_V( VmathSoaMatrix3 mat, VmathSoaVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathSoaMatrix3 vmathSoaM3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix3 mat );
-
-/*
- * Multiply two 3x3 matrices per element
- */
-static inline VmathSoaMatrix3 vmathSoaM3MulPerElem_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
-
-/*
- * Compute the absolute value of a 3x3 matrix per element
- */
-static inline VmathSoaMatrix3 vmathSoaM3AbsPerElem_V( VmathSoaMatrix3 mat );
-
-/*
- * Transpose of a 3x3 matrix
- */
-static inline VmathSoaMatrix3 vmathSoaM3Transpose_V( VmathSoaMatrix3 mat );
-
-/*
- * Compute the inverse of a 3x3 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathSoaMatrix3 vmathSoaM3Inverse_V( VmathSoaMatrix3 mat );
-
-/*
- * Determinant of a 3x3 matrix
- */
-static inline vec_float4 vmathSoaM3Determinant_V( VmathSoaMatrix3 mat );
-
-/*
- * Conditionally select between two 3x3 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaMatrix3 vmathSoaM3Select_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x3 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM3Print_V( VmathSoaMatrix3 mat );
-
-/*
- * Print a 3x3 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM3Prints_V( VmathSoaMatrix3 mat, const char *name );
-
-#endif
-
-/*
- * Construct a 4x4 matrix containing the specified columns
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromCols_V( VmathSoaVector4 col0, VmathSoaVector4 col1, VmathSoaVector4 col2, VmathSoaVector4 col3 );
-
-/*
- * Construct a 4x4 matrix from a 3x4 transformation matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromT3_V( VmathSoaTransform3 mat );
-
-/*
- * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromM3V3_V( VmathSoaMatrix3 mat, VmathSoaVector3 translateVec );
-
-/*
- * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec );
-
-/*
- * Set all elements of a 4x4 matrix to the same scalar value
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS 4x4 matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromAos_V( VmathMatrix4 mat );
-
-/*
- * Insert four AoS 4x4 matrices
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFrom4Aos_V( VmathMatrix4 mat0, VmathMatrix4 mat1, VmathMatrix4 mat2, VmathMatrix4 mat3 );
-
-/*
- * Extract four AoS 4x4 matrices
- */
-static inline void vmathSoaM4Get4Aos_V( VmathSoaMatrix4 mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 );
-
-/*
- * Set the upper-left 3x3 submatrix
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathSoaM4SetUpper3x3_V( VmathSoaMatrix4 *result, VmathSoaMatrix3 mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 4x4 matrix
- */
-static inline VmathSoaMatrix3 vmathSoaM4GetUpper3x3_V( VmathSoaMatrix4 mat );
-
-/*
- * Set translation component
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathSoaM4SetTranslation_V( VmathSoaMatrix4 *result, VmathSoaVector3 translateVec );
-
-/*
- * Get the translation component of a 4x4 matrix
- */
-static inline VmathSoaVector3 vmathSoaM4GetTranslation_V( VmathSoaMatrix4 mat );
-
-/*
- * Set column 0 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol0_V( VmathSoaMatrix4 *result, VmathSoaVector4 col0 );
-
-/*
- * Set column 1 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol1_V( VmathSoaMatrix4 *result, VmathSoaVector4 col1 );
-
-/*
- * Set column 2 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol2_V( VmathSoaMatrix4 *result, VmathSoaVector4 col2 );
-
-/*
- * Set column 3 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol3_V( VmathSoaMatrix4 *result, VmathSoaVector4 col3 );
-
-/*
- * Get column 0 of a 4x4 matrix
- */
-static inline VmathSoaVector4 vmathSoaM4GetCol0_V( VmathSoaMatrix4 mat );
-
-/*
- * Get column 1 of a 4x4 matrix
- */
-static inline VmathSoaVector4 vmathSoaM4GetCol1_V( VmathSoaMatrix4 mat );
-
-/*
- * Get column 2 of a 4x4 matrix
- */
-static inline VmathSoaVector4 vmathSoaM4GetCol2_V( VmathSoaMatrix4 mat );
-
-/*
- * Get column 3 of a 4x4 matrix
- */
-static inline VmathSoaVector4 vmathSoaM4GetCol3_V( VmathSoaMatrix4 mat );
-
-/*
- * Set the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathSoaM4SetCol_V( VmathSoaMatrix4 *result, int col, VmathSoaVector4 vec );
-
-/*
- * Set the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathSoaM4SetRow_V( VmathSoaMatrix4 *result, int row, VmathSoaVector4 vec );
-
-/*
- * Get the column of a 4x4 matrix referred to by the specified index
- */
-static inline VmathSoaVector4 vmathSoaM4GetCol_V( VmathSoaMatrix4 mat, int col );
-
-/*
- * Get the row of a 4x4 matrix referred to by the specified index
- */
-static inline VmathSoaVector4 vmathSoaM4GetRow_V( VmathSoaMatrix4 mat, int row );
-
-/*
- * Set the element of a 4x4 matrix referred to by column and row indices
- */
-static inline void vmathSoaM4SetElem_V( VmathSoaMatrix4 *result, int col, int row, vec_float4 val );
-
-/*
- * Get the element of a 4x4 matrix referred to by column and row indices
- */
-static inline vec_float4 vmathSoaM4GetElem_V( VmathSoaMatrix4 mat, int col, int row );
-
-/*
- * Add two 4x4 matrices
- */
-static inline VmathSoaMatrix4 vmathSoaM4Add_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
-
-/*
- * Subtract a 4x4 matrix from another 4x4 matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4Sub_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
-
-/*
- * Negate all elements of a 4x4 matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4Neg_V( VmathSoaMatrix4 mat );
-
-/*
- * Multiply a 4x4 matrix by a scalar
- */
-static inline VmathSoaMatrix4 vmathSoaM4ScalarMul_V( VmathSoaMatrix4 mat, vec_float4 scalar );
-
-/*
- * Multiply a 4x4 matrix by a 4-D vector
- */
-static inline VmathSoaVector4 vmathSoaM4MulV4_V( VmathSoaMatrix4 mat, VmathSoaVector4 vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D vector
- */
-static inline VmathSoaVector4 vmathSoaM4MulV3_V( VmathSoaMatrix4 mat, VmathSoaVector3 vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D point
- */
-static inline VmathSoaVector4 vmathSoaM4MulP3_V( VmathSoaMatrix4 mat, VmathSoaPoint3 pnt );
-
-/*
- * Multiply two 4x4 matrices
- */
-static inline VmathSoaMatrix4 vmathSoaM4Mul_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
-
-/*
- * Multiply a 4x4 matrix by a 3x4 transformation matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4MulT3_V( VmathSoaMatrix4 mat, VmathSoaTransform3 tfrm );
-
-/*
- * Construct an identity 4x4 matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeIdentity_V( );
-
-/*
- * Construct a 4x4 matrix to rotate around the x axis
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationX_V( vec_float4 radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the y axis
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationY_V( vec_float4 radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the z axis
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZ_V( vec_float4 radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the x, y, and z axes
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZYX_V( VmathSoaVector3 radiansXYZ );
-
-/*
- * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationQ_V( VmathSoaQuat unitQuat );
-
-/*
- * Construct a 4x4 matrix to perform scaling
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeScale_V( VmathSoaVector3 scaleVec );
-
-/*
- * Construct a 4x4 matrix to perform translation
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeTranslation_V( VmathSoaVector3 translateVec );
-
-/*
- * Construct viewing matrix based on eye position, position looked at, and up direction
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeLookAt_V( VmathSoaPoint3 eyePos, VmathSoaPoint3 lookAtPos, VmathSoaVector3 upVec );
-
-/*
- * Construct a perspective projection matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakePerspective_V( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar );
-
-/*
- * Construct a perspective projection matrix based on frustum
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFrustum_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
-
-/*
- * Construct an orthographic projection matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeOrthographic_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
-
-/*
- * Append (post-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathSoaMatrix4 vmathSoaM4AppendScale_V( VmathSoaMatrix4 mat, VmathSoaVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathSoaMatrix4 vmathSoaM4PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix4 mat );
-
-/*
- * Multiply two 4x4 matrices per element
- */
-static inline VmathSoaMatrix4 vmathSoaM4MulPerElem_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
-
-/*
- * Compute the absolute value of a 4x4 matrix per element
- */
-static inline VmathSoaMatrix4 vmathSoaM4AbsPerElem_V( VmathSoaMatrix4 mat );
-
-/*
- * Transpose of a 4x4 matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4Transpose_V( VmathSoaMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathSoaMatrix4 vmathSoaM4Inverse_V( VmathSoaMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathSoaMatrix4 vmathSoaM4AffineInverse_V( VmathSoaMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
- */
-static inline VmathSoaMatrix4 vmathSoaM4OrthoInverse_V( VmathSoaMatrix4 mat );
-
-/*
- * Determinant of a 4x4 matrix
- */
-static inline vec_float4 vmathSoaM4Determinant_V( VmathSoaMatrix4 mat );
-
-/*
- * Conditionally select between two 4x4 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaMatrix4 vmathSoaM4Select_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4x4 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM4Print_V( VmathSoaMatrix4 mat );
-
-/*
- * Print a 4x4 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM4Prints_V( VmathSoaMatrix4 mat, const char *name );
-
-#endif
-
-/*
- * Construct a 3x4 transformation matrix containing the specified columns
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeFromCols_V( VmathSoaVector3 col0, VmathSoaVector3 col1, VmathSoaVector3 col2, VmathSoaVector3 col3 );
-
-/*
- * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeFromM3V3_V( VmathSoaMatrix3 tfrm, VmathSoaVector3 translateVec );
-
-/*
- * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec );
-
-/*
- * Set all elements of a 3x4 transformation matrix to the same scalar value
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS 3x4 transformation matrix
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeFromAos_V( VmathTransform3 tfrm );
-
-/*
- * Insert four AoS 3x4 transformation matrices
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeFrom4Aos_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, VmathTransform3 tfrm2, VmathTransform3 tfrm3 );
-
-/*
- * Extract four AoS 3x4 transformation matrices
- */
-static inline void vmathSoaT3Get4Aos_V( VmathSoaTransform3 tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 );
-
-/*
- * Set the upper-left 3x3 submatrix
- */
-static inline void vmathSoaT3SetUpper3x3_V( VmathSoaTransform3 *result, VmathSoaMatrix3 mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
- */
-static inline VmathSoaMatrix3 vmathSoaT3GetUpper3x3_V( VmathSoaTransform3 tfrm );
-
-/*
- * Set translation component
- */
-static inline void vmathSoaT3SetTranslation_V( VmathSoaTransform3 *result, VmathSoaVector3 translateVec );
-
-/*
- * Get the translation component of a 3x4 transformation matrix
- */
-static inline VmathSoaVector3 vmathSoaT3GetTranslation_V( VmathSoaTransform3 tfrm );
-
-/*
- * Set column 0 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol0_V( VmathSoaTransform3 *result, VmathSoaVector3 col0 );
-
-/*
- * Set column 1 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol1_V( VmathSoaTransform3 *result, VmathSoaVector3 col1 );
-
-/*
- * Set column 2 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol2_V( VmathSoaTransform3 *result, VmathSoaVector3 col2 );
-
-/*
- * Set column 3 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol3_V( VmathSoaTransform3 *result, VmathSoaVector3 col3 );
-
-/*
- * Get column 0 of a 3x4 transformation matrix
- */
-static inline VmathSoaVector3 vmathSoaT3GetCol0_V( VmathSoaTransform3 tfrm );
-
-/*
- * Get column 1 of a 3x4 transformation matrix
- */
-static inline VmathSoaVector3 vmathSoaT3GetCol1_V( VmathSoaTransform3 tfrm );
-
-/*
- * Get column 2 of a 3x4 transformation matrix
- */
-static inline VmathSoaVector3 vmathSoaT3GetCol2_V( VmathSoaTransform3 tfrm );
-
-/*
- * Get column 3 of a 3x4 transformation matrix
- */
-static inline VmathSoaVector3 vmathSoaT3GetCol3_V( VmathSoaTransform3 tfrm );
-
-/*
- * Set the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathSoaT3SetCol_V( VmathSoaTransform3 *result, int col, VmathSoaVector3 vec );
-
-/*
- * Set the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathSoaT3SetRow_V( VmathSoaTransform3 *result, int row, VmathSoaVector4 vec );
-
-/*
- * Get the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline VmathSoaVector3 vmathSoaT3GetCol_V( VmathSoaTransform3 tfrm, int col );
-
-/*
- * Get the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline VmathSoaVector4 vmathSoaT3GetRow_V( VmathSoaTransform3 tfrm, int row );
-
-/*
- * Set the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline void vmathSoaT3SetElem_V( VmathSoaTransform3 *result, int col, int row, vec_float4 val );
-
-/*
- * Get the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline vec_float4 vmathSoaT3GetElem_V( VmathSoaTransform3 tfrm, int col, int row );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaT3MulV3_V( VmathSoaTransform3 tfrm, VmathSoaVector3 vec );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaT3MulP3_V( VmathSoaTransform3 tfrm, VmathSoaPoint3 pnt );
-
-/*
- * Multiply two 3x4 transformation matrices
- */
-static inline VmathSoaTransform3 vmathSoaT3Mul_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 );
-
-/*
- * Construct an identity 3x4 transformation matrix
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeIdentity_V( );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x axis
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationX_V( vec_float4 radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the y axis
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationY_V( vec_float4 radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the z axis
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationZ_V( vec_float4 radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationQ_V( VmathSoaQuat unitQuat );
-
-/*
- * Construct a 3x4 transformation matrix to perform scaling
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeScale_V( VmathSoaVector3 scaleVec );
-
-/*
- * Construct a 3x4 transformation matrix to perform translation
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeTranslation_V( VmathSoaVector3 translateVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathSoaTransform3 vmathSoaT3AppendScale_V( VmathSoaTransform3 tfrm, VmathSoaVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathSoaTransform3 vmathSoaT3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaTransform3 tfrm );
-
-/*
- * Multiply two 3x4 transformation matrices per element
- */
-static inline VmathSoaTransform3 vmathSoaT3MulPerElem_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 );
-
-/*
- * Compute the absolute value of a 3x4 transformation matrix per element
- */
-static inline VmathSoaTransform3 vmathSoaT3AbsPerElem_V( VmathSoaTransform3 tfrm );
-
-/*
- * Inverse of a 3x4 transformation matrix
- * NOTE: 
- * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
- */
-static inline VmathSoaTransform3 vmathSoaT3Inverse_V( VmathSoaTransform3 tfrm );
-
-/*
- * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
- */
-static inline VmathSoaTransform3 vmathSoaT3OrthoInverse_V( VmathSoaTransform3 tfrm );
-
-/*
- * Conditionally select between two 3x4 transformation matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaTransform3 vmathSoaT3Select_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x4 transformation matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaT3Print_V( VmathSoaTransform3 tfrm );
-
-/*
- * Print a 3x4 transformation matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaT3Prints_V( VmathSoaTransform3 tfrm, const char *name );
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#include "vectormath_soa.h"
-#include "vec_soa_v.h"
-#include "quat_soa_v.h"
-#include "mat_soa_v.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_SOA_C_V_SPU_H
+#define _VECTORMATH_SOA_C_V_SPU_H
+
+#include <math.h>
+#include <spu_intrinsics.h>
+#include "vectormath_aos_v.h"
+
+#ifdef _VECTORMATH_DEBUG
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef _VECTORMATH_SOA_C_TYPES_H
+#define _VECTORMATH_SOA_C_TYPES_H
+
+/* A set of four 3-D vectors in structure-of-arrays format
+ */
+typedef struct _VmathSoaVector3
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+} VmathSoaVector3;
+
+/* A set of four 4-D vectors in structure-of-arrays format
+ */
+typedef struct _VmathSoaVector4
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+    vec_float4 w;
+} VmathSoaVector4;
+
+/* A set of four 3-D points in structure-of-arrays format
+ */
+typedef struct _VmathSoaPoint3
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+} VmathSoaPoint3;
+
+/* A set of four quaternions in structure-of-arrays format
+ */
+typedef struct _VmathSoaQuat
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+    vec_float4 w;
+} VmathSoaQuat;
+
+/* A set of four 3x3 matrices in structure-of-arrays format
+ */
+typedef struct _VmathSoaMatrix3
+{
+    VmathSoaVector3 col0;
+    VmathSoaVector3 col1;
+    VmathSoaVector3 col2;
+} VmathSoaMatrix3;
+
+/* A set of four 4x4 matrices in structure-of-arrays format
+ */
+typedef struct _VmathSoaMatrix4
+{
+    VmathSoaVector4 col0;
+    VmathSoaVector4 col1;
+    VmathSoaVector4 col2;
+    VmathSoaVector4 col3;
+} VmathSoaMatrix4;
+
+/* A set of four 3x4 transformation matrices in structure-of-arrays format
+ */
+typedef struct _VmathSoaTransform3
+{
+    VmathSoaVector3 col0;
+    VmathSoaVector3 col1;
+    VmathSoaVector3 col2;
+    VmathSoaVector3 col3;
+} VmathSoaTransform3;
+
+#endif
+
+/*
+ * Construct a 3-D vector from x, y, and z elements
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z );
+
+/*
+ * Copy elements from a 3-D point into a 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeFromP3_V( VmathSoaPoint3 pnt );
+
+/*
+ * Set all elements of a 3-D vector to the same scalar value
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeFromAos_V( VmathVector3 vec );
+
+/*
+ * Insert four AoS 3-D vectors
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeFrom4Aos_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3 );
+
+/*
+ * Extract four AoS 3-D vectors
+ */
+static inline void vmathSoaV3Get4Aos_V( VmathSoaVector3 vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 );
+
+/*
+ * Set the x element of a 3-D vector
+ */
+static inline void vmathSoaV3SetX_V( VmathSoaVector3 *result, vec_float4 x );
+
+/*
+ * Set the y element of a 3-D vector
+ */
+static inline void vmathSoaV3SetY_V( VmathSoaVector3 *result, vec_float4 y );
+
+/*
+ * Set the z element of a 3-D vector
+ */
+static inline void vmathSoaV3SetZ_V( VmathSoaVector3 *result, vec_float4 z );
+
+/*
+ * Get the x element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3GetX_V( VmathSoaVector3 vec );
+
+/*
+ * Get the y element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3GetY_V( VmathSoaVector3 vec );
+
+/*
+ * Get the z element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3GetZ_V( VmathSoaVector3 vec );
+
+/*
+ * Set an x, y, or z element of a 3-D vector by index
+ */
+static inline void vmathSoaV3SetElem_V( VmathSoaVector3 *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, or z element of a 3-D vector by index
+ */
+static inline vec_float4 vmathSoaV3GetElem_V( VmathSoaVector3 vec, int idx );
+
+/*
+ * Add two 3-D vectors
+ */
+static inline VmathSoaVector3 vmathSoaV3Add_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Subtract a 3-D vector from another 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaV3Sub_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Add a 3-D vector to a 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaV3AddP3_V( VmathSoaVector3 vec, VmathSoaPoint3 pnt );
+
+/*
+ * Multiply a 3-D vector by a scalar
+ */
+static inline VmathSoaVector3 vmathSoaV3ScalarMul_V( VmathSoaVector3 vec, vec_float4 scalar );
+
+/*
+ * Divide a 3-D vector by a scalar
+ */
+static inline VmathSoaVector3 vmathSoaV3ScalarDiv_V( VmathSoaVector3 vec, vec_float4 scalar );
+
+/*
+ * Negate all elements of a 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaV3Neg_V( VmathSoaVector3 vec );
+
+/*
+ * Construct x axis
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeXAxis_V( );
+
+/*
+ * Construct y axis
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeYAxis_V( );
+
+/*
+ * Construct z axis
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeZAxis_V( );
+
+/*
+ * Multiply two 3-D vectors per element
+ */
+static inline VmathSoaVector3 vmathSoaV3MulPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Divide two 3-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathSoaVector3 vmathSoaV3DivPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Compute the reciprocal of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathSoaVector3 vmathSoaV3RecipPerElem_V( VmathSoaVector3 vec );
+
+/*
+ * Compute the square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathSoaVector3 vmathSoaV3SqrtPerElem_V( VmathSoaVector3 vec );
+
+/*
+ * Compute the reciprocal square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathSoaVector3 vmathSoaV3RsqrtPerElem_V( VmathSoaVector3 vec );
+
+/*
+ * Compute the absolute value of a 3-D vector per element
+ */
+static inline VmathSoaVector3 vmathSoaV3AbsPerElem_V( VmathSoaVector3 vec );
+
+/*
+ * Copy sign from one 3-D vector to another, per element
+ */
+static inline VmathSoaVector3 vmathSoaV3CopySignPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Maximum of two 3-D vectors per element
+ */
+static inline VmathSoaVector3 vmathSoaV3MaxPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Minimum of two 3-D vectors per element
+ */
+static inline VmathSoaVector3 vmathSoaV3MinPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Maximum element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3MaxElem_V( VmathSoaVector3 vec );
+
+/*
+ * Minimum element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3MinElem_V( VmathSoaVector3 vec );
+
+/*
+ * Compute the sum of all elements of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3Sum_V( VmathSoaVector3 vec );
+
+/*
+ * Compute the dot product of two 3-D vectors
+ */
+static inline vec_float4 vmathSoaV3Dot_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Compute the square of the length of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3LengthSqr_V( VmathSoaVector3 vec );
+
+/*
+ * Compute the length of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3Length_V( VmathSoaVector3 vec );
+
+/*
+ * Normalize a 3-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline VmathSoaVector3 vmathSoaV3Normalize_V( VmathSoaVector3 vec );
+
+/*
+ * Compute cross product of two 3-D vectors
+ */
+static inline VmathSoaVector3 vmathSoaV3Cross_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Outer product of two 3-D vectors
+ */
+static inline VmathSoaMatrix3 vmathSoaV3Outer_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Pre-multiply a row vector by a 3x3 matrix
+ */
+static inline VmathSoaVector3 vmathSoaV3RowMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat );
+
+/*
+ * Cross-product matrix of a 3-D vector
+ */
+static inline VmathSoaMatrix3 vmathSoaV3CrossMatrix_V( VmathSoaVector3 vec );
+
+/*
+ * Create cross-product matrix and multiply
+ * NOTE: 
+ * Faster than separately creating a cross-product matrix and multiplying.
+ */
+static inline VmathSoaMatrix3 vmathSoaV3CrossMatrixMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat );
+
+/*
+ * Linear interpolation between two 3-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaVector3 vmathSoaV3Lerp_V( vec_float4 t, VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Spherical linear interpolation between two 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaVector3 vmathSoaV3Slerp_V( vec_float4 t, VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 );
+
+/*
+ * Conditionally select between two 3-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaVector3 vmathSoaV3Select_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_uint4 select1 );
+
+/*
+ * Load four three-float 3-D vectors, stored in three quadwords
+ */
+static inline void vmathSoaV3LoadXYZArray_V( VmathSoaVector3 *vec, const vec_float4 *threeQuads );
+
+/*
+ * Store four slots of an SoA 3-D vector in three quadwords
+ */
+static inline void vmathSoaV3StoreXYZArray_V( VmathSoaVector3 vec, vec_float4 *threeQuads );
+
+/*
+ * Store eight slots of two SoA 3-D vectors as half-floats
+ */
+static inline void vmathSoaV3StoreHalfFloats_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV3Print_V( VmathSoaVector3 vec );
+
+/*
+ * Print a 3-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV3Prints_V( VmathSoaVector3 vec, const char *name );
+
+#endif
+
+/*
+ * Construct a 4-D vector from x, y, z, and w elements
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
+
+/*
+ * Construct a 4-D vector from a 3-D vector and a scalar
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 w );
+
+/*
+ * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromV3_V( VmathSoaVector3 vec );
+
+/*
+ * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromP3_V( VmathSoaPoint3 pnt );
+
+/*
+ * Copy elements from a quaternion into a 4-D vector
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromQ_V( VmathSoaQuat quat );
+
+/*
+ * Set all elements of a 4-D vector to the same scalar value
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS 4-D vector
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromAos_V( VmathVector4 vec );
+
+/*
+ * Insert four AoS 4-D vectors
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFrom4Aos_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3 );
+
+/*
+ * Extract four AoS 4-D vectors
+ */
+static inline void vmathSoaV4Get4Aos_V( VmathSoaVector4 vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 );
+
+/*
+ * Set the x, y, and z elements of a 4-D vector
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathSoaV4SetXYZ_V( VmathSoaVector4 *result, VmathSoaVector3 vec );
+
+/*
+ * Get the x, y, and z elements of a 4-D vector
+ */
+static inline VmathSoaVector3 vmathSoaV4GetXYZ_V( VmathSoaVector4 vec );
+
+/*
+ * Set the x element of a 4-D vector
+ */
+static inline void vmathSoaV4SetX_V( VmathSoaVector4 *result, vec_float4 x );
+
+/*
+ * Set the y element of a 4-D vector
+ */
+static inline void vmathSoaV4SetY_V( VmathSoaVector4 *result, vec_float4 y );
+
+/*
+ * Set the z element of a 4-D vector
+ */
+static inline void vmathSoaV4SetZ_V( VmathSoaVector4 *result, vec_float4 z );
+
+/*
+ * Set the w element of a 4-D vector
+ */
+static inline void vmathSoaV4SetW_V( VmathSoaVector4 *result, vec_float4 w );
+
+/*
+ * Get the x element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetX_V( VmathSoaVector4 vec );
+
+/*
+ * Get the y element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetY_V( VmathSoaVector4 vec );
+
+/*
+ * Get the z element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetZ_V( VmathSoaVector4 vec );
+
+/*
+ * Get the w element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetW_V( VmathSoaVector4 vec );
+
+/*
+ * Set an x, y, z, or w element of a 4-D vector by index
+ */
+static inline void vmathSoaV4SetElem_V( VmathSoaVector4 *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, z, or w element of a 4-D vector by index
+ */
+static inline vec_float4 vmathSoaV4GetElem_V( VmathSoaVector4 vec, int idx );
+
+/*
+ * Add two 4-D vectors
+ */
+static inline VmathSoaVector4 vmathSoaV4Add_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Subtract a 4-D vector from another 4-D vector
+ */
+static inline VmathSoaVector4 vmathSoaV4Sub_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Multiply a 4-D vector by a scalar
+ */
+static inline VmathSoaVector4 vmathSoaV4ScalarMul_V( VmathSoaVector4 vec, vec_float4 scalar );
+
+/*
+ * Divide a 4-D vector by a scalar
+ */
+static inline VmathSoaVector4 vmathSoaV4ScalarDiv_V( VmathSoaVector4 vec, vec_float4 scalar );
+
+/*
+ * Negate all elements of a 4-D vector
+ */
+static inline VmathSoaVector4 vmathSoaV4Neg_V( VmathSoaVector4 vec );
+
+/*
+ * Construct x axis
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeXAxis_V( );
+
+/*
+ * Construct y axis
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeYAxis_V( );
+
+/*
+ * Construct z axis
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeZAxis_V( );
+
+/*
+ * Construct w axis
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeWAxis_V( );
+
+/*
+ * Multiply two 4-D vectors per element
+ */
+static inline VmathSoaVector4 vmathSoaV4MulPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Divide two 4-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathSoaVector4 vmathSoaV4DivPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Compute the reciprocal of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathSoaVector4 vmathSoaV4RecipPerElem_V( VmathSoaVector4 vec );
+
+/*
+ * Compute the square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathSoaVector4 vmathSoaV4SqrtPerElem_V( VmathSoaVector4 vec );
+
+/*
+ * Compute the reciprocal square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathSoaVector4 vmathSoaV4RsqrtPerElem_V( VmathSoaVector4 vec );
+
+/*
+ * Compute the absolute value of a 4-D vector per element
+ */
+static inline VmathSoaVector4 vmathSoaV4AbsPerElem_V( VmathSoaVector4 vec );
+
+/*
+ * Copy sign from one 4-D vector to another, per element
+ */
+static inline VmathSoaVector4 vmathSoaV4CopySignPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Maximum of two 4-D vectors per element
+ */
+static inline VmathSoaVector4 vmathSoaV4MaxPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Minimum of two 4-D vectors per element
+ */
+static inline VmathSoaVector4 vmathSoaV4MinPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Maximum element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4MaxElem_V( VmathSoaVector4 vec );
+
+/*
+ * Minimum element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4MinElem_V( VmathSoaVector4 vec );
+
+/*
+ * Compute the sum of all elements of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4Sum_V( VmathSoaVector4 vec );
+
+/*
+ * Compute the dot product of two 4-D vectors
+ */
+static inline vec_float4 vmathSoaV4Dot_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Compute the square of the length of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4LengthSqr_V( VmathSoaVector4 vec );
+
+/*
+ * Compute the length of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4Length_V( VmathSoaVector4 vec );
+
+/*
+ * Normalize a 4-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline VmathSoaVector4 vmathSoaV4Normalize_V( VmathSoaVector4 vec );
+
+/*
+ * Outer product of two 4-D vectors
+ */
+static inline VmathSoaMatrix4 vmathSoaV4Outer_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Linear interpolation between two 4-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaVector4 vmathSoaV4Lerp_V( vec_float4 t, VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Spherical linear interpolation between two 4-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaVector4 vmathSoaV4Slerp_V( vec_float4 t, VmathSoaVector4 unitVec0, VmathSoaVector4 unitVec1 );
+
+/*
+ * Conditionally select between two 4-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaVector4 vmathSoaV4Select_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1, vec_uint4 select1 );
+
+/*
+ * Store four slots of an SoA 4-D vector as half-floats
+ */
+static inline void vmathSoaV4StoreHalfFloats_V( VmathSoaVector4 vec, vec_ushort8 *twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV4Print_V( VmathSoaVector4 vec );
+
+/*
+ * Print a 4-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV4Prints_V( VmathSoaVector4 vec, const char *name );
+
+#endif
+
+/*
+ * Construct a 3-D point from x, y, and z elements
+ */
+static inline VmathSoaPoint3 vmathSoaP3MakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z );
+
+/*
+ * Copy elements from a 3-D vector into a 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaP3MakeFromV3_V( VmathSoaVector3 vec );
+
+/*
+ * Set all elements of a 3-D point to the same scalar value
+ */
+static inline VmathSoaPoint3 vmathSoaP3MakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaP3MakeFromAos_V( VmathPoint3 pnt );
+
+/*
+ * Insert four AoS 3-D points
+ */
+static inline VmathSoaPoint3 vmathSoaP3MakeFrom4Aos_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3 );
+
+/*
+ * Extract four AoS 3-D points
+ */
+static inline void vmathSoaP3Get4Aos_V( VmathSoaPoint3 pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 );
+
+/*
+ * Set the x element of a 3-D point
+ */
+static inline void vmathSoaP3SetX_V( VmathSoaPoint3 *result, vec_float4 x );
+
+/*
+ * Set the y element of a 3-D point
+ */
+static inline void vmathSoaP3SetY_V( VmathSoaPoint3 *result, vec_float4 y );
+
+/*
+ * Set the z element of a 3-D point
+ */
+static inline void vmathSoaP3SetZ_V( VmathSoaPoint3 *result, vec_float4 z );
+
+/*
+ * Get the x element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3GetX_V( VmathSoaPoint3 pnt );
+
+/*
+ * Get the y element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3GetY_V( VmathSoaPoint3 pnt );
+
+/*
+ * Get the z element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3GetZ_V( VmathSoaPoint3 pnt );
+
+/*
+ * Set an x, y, or z element of a 3-D point by index
+ */
+static inline void vmathSoaP3SetElem_V( VmathSoaPoint3 *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, or z element of a 3-D point by index
+ */
+static inline vec_float4 vmathSoaP3GetElem_V( VmathSoaPoint3 pnt, int idx );
+
+/*
+ * Subtract a 3-D point from another 3-D point
+ */
+static inline VmathSoaVector3 vmathSoaP3Sub_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Add a 3-D point to a 3-D vector
+ */
+static inline VmathSoaPoint3 vmathSoaP3AddV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec );
+
+/*
+ * Subtract a 3-D vector from a 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaP3SubV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec );
+
+/*
+ * Multiply two 3-D points per element
+ */
+static inline VmathSoaPoint3 vmathSoaP3MulPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Divide two 3-D points per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathSoaPoint3 vmathSoaP3DivPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Compute the reciprocal of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathSoaPoint3 vmathSoaP3RecipPerElem_V( VmathSoaPoint3 pnt );
+
+/*
+ * Compute the square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathSoaPoint3 vmathSoaP3SqrtPerElem_V( VmathSoaPoint3 pnt );
+
+/*
+ * Compute the reciprocal square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathSoaPoint3 vmathSoaP3RsqrtPerElem_V( VmathSoaPoint3 pnt );
+
+/*
+ * Compute the absolute value of a 3-D point per element
+ */
+static inline VmathSoaPoint3 vmathSoaP3AbsPerElem_V( VmathSoaPoint3 pnt );
+
+/*
+ * Copy sign from one 3-D point to another, per element
+ */
+static inline VmathSoaPoint3 vmathSoaP3CopySignPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Maximum of two 3-D points per element
+ */
+static inline VmathSoaPoint3 vmathSoaP3MaxPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Minimum of two 3-D points per element
+ */
+static inline VmathSoaPoint3 vmathSoaP3MinPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Maximum element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3MaxElem_V( VmathSoaPoint3 pnt );
+
+/*
+ * Minimum element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3MinElem_V( VmathSoaPoint3 pnt );
+
+/*
+ * Compute the sum of all elements of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3Sum_V( VmathSoaPoint3 pnt );
+
+/*
+ * Apply uniform scale to a 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaP3Scale_V( VmathSoaPoint3 pnt, vec_float4 scaleVal );
+
+/*
+ * Apply non-uniform scale to a 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaP3NonUniformScale_V( VmathSoaPoint3 pnt, VmathSoaVector3 scaleVec );
+
+/*
+ * Scalar projection of a 3-D point on a unit-length 3-D vector
+ */
+static inline vec_float4 vmathSoaP3Projection_V( VmathSoaPoint3 pnt, VmathSoaVector3 unitVec );
+
+/*
+ * Compute the square of the distance of a 3-D point from the coordinate-system origin
+ */
+static inline vec_float4 vmathSoaP3DistSqrFromOrigin_V( VmathSoaPoint3 pnt );
+
+/*
+ * Compute the distance of a 3-D point from the coordinate-system origin
+ */
+static inline vec_float4 vmathSoaP3DistFromOrigin_V( VmathSoaPoint3 pnt );
+
+/*
+ * Compute the square of the distance between two 3-D points
+ */
+static inline vec_float4 vmathSoaP3DistSqr_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Compute the distance between two 3-D points
+ */
+static inline vec_float4 vmathSoaP3Dist_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Linear interpolation between two 3-D points
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaPoint3 vmathSoaP3Lerp_V( vec_float4 t, VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Conditionally select between two 3-D points
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaPoint3 vmathSoaP3Select_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_uint4 select1 );
+
+/*
+ * Load four three-float 3-D points, stored in three quadwords
+ */
+static inline void vmathSoaP3LoadXYZArray_V( VmathSoaPoint3 *pnt, const vec_float4 *threeQuads );
+
+/*
+ * Store four slots of an SoA 3-D point in three quadwords
+ */
+static inline void vmathSoaP3StoreXYZArray_V( VmathSoaPoint3 pnt, vec_float4 *threeQuads );
+
+/*
+ * Store eight slots of two SoA 3-D points as half-floats
+ */
+static inline void vmathSoaP3StoreHalfFloats_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D point
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaP3Print_V( VmathSoaPoint3 pnt );
+
+/*
+ * Print a 3-D point and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaP3Prints_V( VmathSoaPoint3 pnt, const char *name );
+
+#endif
+
+/*
+ * Construct a quaternion from x, y, z, and w elements
+ */
+static inline VmathSoaQuat vmathSoaQMakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
+
+/*
+ * Construct a quaternion from a 3-D vector and a scalar
+ */
+static inline VmathSoaQuat vmathSoaQMakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 w );
+
+/*
+ * Copy elements from a 4-D vector into a quaternion
+ */
+static inline VmathSoaQuat vmathSoaQMakeFromV4_V( VmathSoaVector4 vec );
+
+/*
+ * Convert a rotation matrix to a unit-length quaternion
+ */
+static inline VmathSoaQuat vmathSoaQMakeFromM3_V( VmathSoaMatrix3 rotMat );
+
+/*
+ * Set all elements of a quaternion to the same scalar value
+ */
+static inline VmathSoaQuat vmathSoaQMakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS quaternion
+ */
+static inline VmathSoaQuat vmathSoaQMakeFromAos_V( VmathQuat quat );
+
+/*
+ * Insert four AoS quaternions
+ */
+static inline VmathSoaQuat vmathSoaQMakeFrom4Aos_V( VmathQuat quat0, VmathQuat quat1, VmathQuat quat2, VmathQuat quat3 );
+
+/*
+ * Extract four AoS quaternions
+ */
+static inline void vmathSoaQGet4Aos_V( VmathSoaQuat quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 );
+
+/*
+ * Set the x, y, and z elements of a quaternion
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathSoaQSetXYZ_V( VmathSoaQuat *result, VmathSoaVector3 vec );
+
+/*
+ * Get the x, y, and z elements of a quaternion
+ */
+static inline VmathSoaVector3 vmathSoaQGetXYZ_V( VmathSoaQuat quat );
+
+/*
+ * Set the x element of a quaternion
+ */
+static inline void vmathSoaQSetX_V( VmathSoaQuat *result, vec_float4 x );
+
+/*
+ * Set the y element of a quaternion
+ */
+static inline void vmathSoaQSetY_V( VmathSoaQuat *result, vec_float4 y );
+
+/*
+ * Set the z element of a quaternion
+ */
+static inline void vmathSoaQSetZ_V( VmathSoaQuat *result, vec_float4 z );
+
+/*
+ * Set the w element of a quaternion
+ */
+static inline void vmathSoaQSetW_V( VmathSoaQuat *result, vec_float4 w );
+
+/*
+ * Get the x element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetX_V( VmathSoaQuat quat );
+
+/*
+ * Get the y element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetY_V( VmathSoaQuat quat );
+
+/*
+ * Get the z element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetZ_V( VmathSoaQuat quat );
+
+/*
+ * Get the w element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetW_V( VmathSoaQuat quat );
+
+/*
+ * Set an x, y, z, or w element of a quaternion by index
+ */
+static inline void vmathSoaQSetElem_V( VmathSoaQuat *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, z, or w element of a quaternion by index
+ */
+static inline vec_float4 vmathSoaQGetElem_V( VmathSoaQuat quat, int idx );
+
+/*
+ * Add two quaternions
+ */
+static inline VmathSoaQuat vmathSoaQAdd_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
+
+/*
+ * Subtract a quaternion from another quaternion
+ */
+static inline VmathSoaQuat vmathSoaQSub_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
+
+/*
+ * Multiply two quaternions
+ */
+static inline VmathSoaQuat vmathSoaQMul_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
+
+/*
+ * Multiply a quaternion by a scalar
+ */
+static inline VmathSoaQuat vmathSoaQScalarMul_V( VmathSoaQuat quat, vec_float4 scalar );
+
+/*
+ * Divide a quaternion by a scalar
+ */
+static inline VmathSoaQuat vmathSoaQScalarDiv_V( VmathSoaQuat quat, vec_float4 scalar );
+
+/*
+ * Negate all elements of a quaternion
+ */
+static inline VmathSoaQuat vmathSoaQNeg_V( VmathSoaQuat quat );
+
+/*
+ * Construct an identity quaternion
+ */
+static inline VmathSoaQuat vmathSoaQMakeIdentity_V( );
+
+/*
+ * Construct a quaternion to rotate between two unit-length 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+ */
+static inline VmathSoaQuat vmathSoaQMakeRotationArc_V( VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 );
+
+/*
+ * Construct a quaternion to rotate around a unit-length 3-D vector
+ */
+static inline VmathSoaQuat vmathSoaQMakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
+
+/*
+ * Construct a quaternion to rotate around the x axis
+ */
+static inline VmathSoaQuat vmathSoaQMakeRotationX_V( vec_float4 radians );
+
+/*
+ * Construct a quaternion to rotate around the y axis
+ */
+static inline VmathSoaQuat vmathSoaQMakeRotationY_V( vec_float4 radians );
+
+/*
+ * Construct a quaternion to rotate around the z axis
+ */
+static inline VmathSoaQuat vmathSoaQMakeRotationZ_V( vec_float4 radians );
+
+/*
+ * Compute the conjugate of a quaternion
+ */
+static inline VmathSoaQuat vmathSoaQConj_V( VmathSoaQuat quat );
+
+/*
+ * Use a unit-length quaternion to rotate a 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaQRotate_V( VmathSoaQuat unitQuat, VmathSoaVector3 vec );
+
+/*
+ * Compute the dot product of two quaternions
+ */
+static inline vec_float4 vmathSoaQDot_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
+
+/*
+ * Compute the norm of a quaternion
+ */
+static inline vec_float4 vmathSoaQNorm_V( VmathSoaQuat quat );
+
+/*
+ * Compute the length of a quaternion
+ */
+static inline vec_float4 vmathSoaQLength_V( VmathSoaQuat quat );
+
+/*
+ * Normalize a quaternion
+ * NOTE: 
+ * The result is unpredictable when all elements of quat are at or near zero.
+ */
+static inline VmathSoaQuat vmathSoaQNormalize_V( VmathSoaQuat quat );
+
+/*
+ * Linear interpolation between two quaternions
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaQuat vmathSoaQLerp_V( vec_float4 t, VmathSoaQuat quat0, VmathSoaQuat quat1 );
+
+/*
+ * Spherical linear interpolation between two quaternions
+ * NOTE: 
+ * Interpolates along the shortest path between orientations.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaQuat vmathSoaQSlerp_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1 );
+
+/*
+ * Spherical quadrangle interpolation
+ */
+static inline VmathSoaQuat vmathSoaQSquad_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1, VmathSoaQuat unitQuat2, VmathSoaQuat unitQuat3 );
+
+/*
+ * Conditionally select between two quaternions
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaQuat vmathSoaQSelect_V( VmathSoaQuat quat0, VmathSoaQuat quat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a quaternion
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaQPrint_V( VmathSoaQuat quat );
+
+/*
+ * Print a quaternion and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaQPrints_V( VmathSoaQuat quat, const char *name );
+
+#endif
+
+/*
+ * Construct a 3x3 matrix containing the specified columns
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromCols_V( VmathSoaVector3 col0, VmathSoaVector3 col1, VmathSoaVector3 col2 );
+
+/*
+ * Construct a 3x3 rotation matrix from a unit-length quaternion
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromQ_V( VmathSoaQuat unitQuat );
+
+/*
+ * Set all elements of a 3x3 matrix to the same scalar value
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3x3 matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromAos_V( VmathMatrix3 mat );
+
+/*
+ * Insert four AoS 3x3 matrices
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeFrom4Aos_V( VmathMatrix3 mat0, VmathMatrix3 mat1, VmathMatrix3 mat2, VmathMatrix3 mat3 );
+
+/*
+ * Extract four AoS 3x3 matrices
+ */
+static inline void vmathSoaM3Get4Aos_V( VmathSoaMatrix3 mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 );
+
+/*
+ * Set column 0 of a 3x3 matrix
+ */
+static inline void vmathSoaM3SetCol0_V( VmathSoaMatrix3 *result, VmathSoaVector3 col0 );
+
+/*
+ * Set column 1 of a 3x3 matrix
+ */
+static inline void vmathSoaM3SetCol1_V( VmathSoaMatrix3 *result, VmathSoaVector3 col1 );
+
+/*
+ * Set column 2 of a 3x3 matrix
+ */
+static inline void vmathSoaM3SetCol2_V( VmathSoaMatrix3 *result, VmathSoaVector3 col2 );
+
+/*
+ * Get column 0 of a 3x3 matrix
+ */
+static inline VmathSoaVector3 vmathSoaM3GetCol0_V( VmathSoaMatrix3 mat );
+
+/*
+ * Get column 1 of a 3x3 matrix
+ */
+static inline VmathSoaVector3 vmathSoaM3GetCol1_V( VmathSoaMatrix3 mat );
+
+/*
+ * Get column 2 of a 3x3 matrix
+ */
+static inline VmathSoaVector3 vmathSoaM3GetCol2_V( VmathSoaMatrix3 mat );
+
+/*
+ * Set the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathSoaM3SetCol_V( VmathSoaMatrix3 *result, int col, VmathSoaVector3 vec );
+
+/*
+ * Set the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathSoaM3SetRow_V( VmathSoaMatrix3 *result, int row, VmathSoaVector3 vec );
+
+/*
+ * Get the column of a 3x3 matrix referred to by the specified index
+ */
+static inline VmathSoaVector3 vmathSoaM3GetCol_V( VmathSoaMatrix3 mat, int col );
+
+/*
+ * Get the row of a 3x3 matrix referred to by the specified index
+ */
+static inline VmathSoaVector3 vmathSoaM3GetRow_V( VmathSoaMatrix3 mat, int row );
+
+/*
+ * Set the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline void vmathSoaM3SetElem_V( VmathSoaMatrix3 *result, int col, int row, vec_float4 val );
+
+/*
+ * Get the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline vec_float4 vmathSoaM3GetElem_V( VmathSoaMatrix3 mat, int col, int row );
+
+/*
+ * Add two 3x3 matrices
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Add_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
+
+/*
+ * Subtract a 3x3 matrix from another 3x3 matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Sub_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
+
+/*
+ * Negate all elements of a 3x3 matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Neg_V( VmathSoaMatrix3 mat );
+
+/*
+ * Multiply a 3x3 matrix by a scalar
+ */
+static inline VmathSoaMatrix3 vmathSoaM3ScalarMul_V( VmathSoaMatrix3 mat, vec_float4 scalar );
+
+/*
+ * Multiply a 3x3 matrix by a 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaM3MulV3_V( VmathSoaMatrix3 mat, VmathSoaVector3 vec );
+
+/*
+ * Multiply two 3x3 matrices
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Mul_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
+
+/*
+ * Construct an identity 3x3 matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeIdentity_V( );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x axis
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationX_V( vec_float4 radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the y axis
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationY_V( vec_float4 radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the z axis
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZ_V( vec_float4 radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x, y, and z axes
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ );
+
+/*
+ * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationQ_V( VmathSoaQuat unitQuat );
+
+/*
+ * Construct a 3x3 matrix to perform scaling
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeScale_V( VmathSoaVector3 scaleVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathSoaMatrix3 vmathSoaM3AppendScale_V( VmathSoaMatrix3 mat, VmathSoaVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathSoaMatrix3 vmathSoaM3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix3 mat );
+
+/*
+ * Multiply two 3x3 matrices per element
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MulPerElem_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
+
+/*
+ * Compute the absolute value of a 3x3 matrix per element
+ */
+static inline VmathSoaMatrix3 vmathSoaM3AbsPerElem_V( VmathSoaMatrix3 mat );
+
+/*
+ * Transpose of a 3x3 matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Transpose_V( VmathSoaMatrix3 mat );
+
+/*
+ * Compute the inverse of a 3x3 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Inverse_V( VmathSoaMatrix3 mat );
+
+/*
+ * Determinant of a 3x3 matrix
+ */
+static inline vec_float4 vmathSoaM3Determinant_V( VmathSoaMatrix3 mat );
+
+/*
+ * Conditionally select between two 3x3 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Select_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x3 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM3Print_V( VmathSoaMatrix3 mat );
+
+/*
+ * Print a 3x3 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM3Prints_V( VmathSoaMatrix3 mat, const char *name );
+
+#endif
+
+/*
+ * Construct a 4x4 matrix containing the specified columns
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromCols_V( VmathSoaVector4 col0, VmathSoaVector4 col1, VmathSoaVector4 col2, VmathSoaVector4 col3 );
+
+/*
+ * Construct a 4x4 matrix from a 3x4 transformation matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromT3_V( VmathSoaTransform3 mat );
+
+/*
+ * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromM3V3_V( VmathSoaMatrix3 mat, VmathSoaVector3 translateVec );
+
+/*
+ * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec );
+
+/*
+ * Set all elements of a 4x4 matrix to the same scalar value
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS 4x4 matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromAos_V( VmathMatrix4 mat );
+
+/*
+ * Insert four AoS 4x4 matrices
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFrom4Aos_V( VmathMatrix4 mat0, VmathMatrix4 mat1, VmathMatrix4 mat2, VmathMatrix4 mat3 );
+
+/*
+ * Extract four AoS 4x4 matrices
+ */
+static inline void vmathSoaM4Get4Aos_V( VmathSoaMatrix4 mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathSoaM4SetUpper3x3_V( VmathSoaMatrix4 *result, VmathSoaMatrix3 mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 4x4 matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaM4GetUpper3x3_V( VmathSoaMatrix4 mat );
+
+/*
+ * Set translation component
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathSoaM4SetTranslation_V( VmathSoaMatrix4 *result, VmathSoaVector3 translateVec );
+
+/*
+ * Get the translation component of a 4x4 matrix
+ */
+static inline VmathSoaVector3 vmathSoaM4GetTranslation_V( VmathSoaMatrix4 mat );
+
+/*
+ * Set column 0 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol0_V( VmathSoaMatrix4 *result, VmathSoaVector4 col0 );
+
+/*
+ * Set column 1 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol1_V( VmathSoaMatrix4 *result, VmathSoaVector4 col1 );
+
+/*
+ * Set column 2 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol2_V( VmathSoaMatrix4 *result, VmathSoaVector4 col2 );
+
+/*
+ * Set column 3 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol3_V( VmathSoaMatrix4 *result, VmathSoaVector4 col3 );
+
+/*
+ * Get column 0 of a 4x4 matrix
+ */
+static inline VmathSoaVector4 vmathSoaM4GetCol0_V( VmathSoaMatrix4 mat );
+
+/*
+ * Get column 1 of a 4x4 matrix
+ */
+static inline VmathSoaVector4 vmathSoaM4GetCol1_V( VmathSoaMatrix4 mat );
+
+/*
+ * Get column 2 of a 4x4 matrix
+ */
+static inline VmathSoaVector4 vmathSoaM4GetCol2_V( VmathSoaMatrix4 mat );
+
+/*
+ * Get column 3 of a 4x4 matrix
+ */
+static inline VmathSoaVector4 vmathSoaM4GetCol3_V( VmathSoaMatrix4 mat );
+
+/*
+ * Set the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathSoaM4SetCol_V( VmathSoaMatrix4 *result, int col, VmathSoaVector4 vec );
+
+/*
+ * Set the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathSoaM4SetRow_V( VmathSoaMatrix4 *result, int row, VmathSoaVector4 vec );
+
+/*
+ * Get the column of a 4x4 matrix referred to by the specified index
+ */
+static inline VmathSoaVector4 vmathSoaM4GetCol_V( VmathSoaMatrix4 mat, int col );
+
+/*
+ * Get the row of a 4x4 matrix referred to by the specified index
+ */
+static inline VmathSoaVector4 vmathSoaM4GetRow_V( VmathSoaMatrix4 mat, int row );
+
+/*
+ * Set the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline void vmathSoaM4SetElem_V( VmathSoaMatrix4 *result, int col, int row, vec_float4 val );
+
+/*
+ * Get the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline vec_float4 vmathSoaM4GetElem_V( VmathSoaMatrix4 mat, int col, int row );
+
+/*
+ * Add two 4x4 matrices
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Add_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
+
+/*
+ * Subtract a 4x4 matrix from another 4x4 matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Sub_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
+
+/*
+ * Negate all elements of a 4x4 matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Neg_V( VmathSoaMatrix4 mat );
+
+/*
+ * Multiply a 4x4 matrix by a scalar
+ */
+static inline VmathSoaMatrix4 vmathSoaM4ScalarMul_V( VmathSoaMatrix4 mat, vec_float4 scalar );
+
+/*
+ * Multiply a 4x4 matrix by a 4-D vector
+ */
+static inline VmathSoaVector4 vmathSoaM4MulV4_V( VmathSoaMatrix4 mat, VmathSoaVector4 vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D vector
+ */
+static inline VmathSoaVector4 vmathSoaM4MulV3_V( VmathSoaMatrix4 mat, VmathSoaVector3 vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D point
+ */
+static inline VmathSoaVector4 vmathSoaM4MulP3_V( VmathSoaMatrix4 mat, VmathSoaPoint3 pnt );
+
+/*
+ * Multiply two 4x4 matrices
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Mul_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
+
+/*
+ * Multiply a 4x4 matrix by a 3x4 transformation matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MulT3_V( VmathSoaMatrix4 mat, VmathSoaTransform3 tfrm );
+
+/*
+ * Construct an identity 4x4 matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeIdentity_V( );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x axis
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationX_V( vec_float4 radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the y axis
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationY_V( vec_float4 radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the z axis
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZ_V( vec_float4 radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x, y, and z axes
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZYX_V( VmathSoaVector3 radiansXYZ );
+
+/*
+ * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationQ_V( VmathSoaQuat unitQuat );
+
+/*
+ * Construct a 4x4 matrix to perform scaling
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeScale_V( VmathSoaVector3 scaleVec );
+
+/*
+ * Construct a 4x4 matrix to perform translation
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeTranslation_V( VmathSoaVector3 translateVec );
+
+/*
+ * Construct viewing matrix based on eye position, position looked at, and up direction
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeLookAt_V( VmathSoaPoint3 eyePos, VmathSoaPoint3 lookAtPos, VmathSoaVector3 upVec );
+
+/*
+ * Construct a perspective projection matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakePerspective_V( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar );
+
+/*
+ * Construct a perspective projection matrix based on frustum
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFrustum_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
+
+/*
+ * Construct an orthographic projection matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeOrthographic_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
+
+/*
+ * Append (post-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathSoaMatrix4 vmathSoaM4AppendScale_V( VmathSoaMatrix4 mat, VmathSoaVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathSoaMatrix4 vmathSoaM4PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix4 mat );
+
+/*
+ * Multiply two 4x4 matrices per element
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MulPerElem_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
+
+/*
+ * Compute the absolute value of a 4x4 matrix per element
+ */
+static inline VmathSoaMatrix4 vmathSoaM4AbsPerElem_V( VmathSoaMatrix4 mat );
+
+/*
+ * Transpose of a 4x4 matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Transpose_V( VmathSoaMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Inverse_V( VmathSoaMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathSoaMatrix4 vmathSoaM4AffineInverse_V( VmathSoaMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+ */
+static inline VmathSoaMatrix4 vmathSoaM4OrthoInverse_V( VmathSoaMatrix4 mat );
+
+/*
+ * Determinant of a 4x4 matrix
+ */
+static inline vec_float4 vmathSoaM4Determinant_V( VmathSoaMatrix4 mat );
+
+/*
+ * Conditionally select between two 4x4 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Select_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4x4 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM4Print_V( VmathSoaMatrix4 mat );
+
+/*
+ * Print a 4x4 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM4Prints_V( VmathSoaMatrix4 mat, const char *name );
+
+#endif
+
+/*
+ * Construct a 3x4 transformation matrix containing the specified columns
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeFromCols_V( VmathSoaVector3 col0, VmathSoaVector3 col1, VmathSoaVector3 col2, VmathSoaVector3 col3 );
+
+/*
+ * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeFromM3V3_V( VmathSoaMatrix3 tfrm, VmathSoaVector3 translateVec );
+
+/*
+ * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec );
+
+/*
+ * Set all elements of a 3x4 transformation matrix to the same scalar value
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3x4 transformation matrix
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeFromAos_V( VmathTransform3 tfrm );
+
+/*
+ * Insert four AoS 3x4 transformation matrices
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeFrom4Aos_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, VmathTransform3 tfrm2, VmathTransform3 tfrm3 );
+
+/*
+ * Extract four AoS 3x4 transformation matrices
+ */
+static inline void vmathSoaT3Get4Aos_V( VmathSoaTransform3 tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ */
+static inline void vmathSoaT3SetUpper3x3_V( VmathSoaTransform3 *result, VmathSoaMatrix3 mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaT3GetUpper3x3_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Set translation component
+ */
+static inline void vmathSoaT3SetTranslation_V( VmathSoaTransform3 *result, VmathSoaVector3 translateVec );
+
+/*
+ * Get the translation component of a 3x4 transformation matrix
+ */
+static inline VmathSoaVector3 vmathSoaT3GetTranslation_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Set column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol0_V( VmathSoaTransform3 *result, VmathSoaVector3 col0 );
+
+/*
+ * Set column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol1_V( VmathSoaTransform3 *result, VmathSoaVector3 col1 );
+
+/*
+ * Set column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol2_V( VmathSoaTransform3 *result, VmathSoaVector3 col2 );
+
+/*
+ * Set column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol3_V( VmathSoaTransform3 *result, VmathSoaVector3 col3 );
+
+/*
+ * Get column 0 of a 3x4 transformation matrix
+ */
+static inline VmathSoaVector3 vmathSoaT3GetCol0_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Get column 1 of a 3x4 transformation matrix
+ */
+static inline VmathSoaVector3 vmathSoaT3GetCol1_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Get column 2 of a 3x4 transformation matrix
+ */
+static inline VmathSoaVector3 vmathSoaT3GetCol2_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Get column 3 of a 3x4 transformation matrix
+ */
+static inline VmathSoaVector3 vmathSoaT3GetCol3_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Set the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathSoaT3SetCol_V( VmathSoaTransform3 *result, int col, VmathSoaVector3 vec );
+
+/*
+ * Set the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathSoaT3SetRow_V( VmathSoaTransform3 *result, int row, VmathSoaVector4 vec );
+
+/*
+ * Get the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline VmathSoaVector3 vmathSoaT3GetCol_V( VmathSoaTransform3 tfrm, int col );
+
+/*
+ * Get the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline VmathSoaVector4 vmathSoaT3GetRow_V( VmathSoaTransform3 tfrm, int row );
+
+/*
+ * Set the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline void vmathSoaT3SetElem_V( VmathSoaTransform3 *result, int col, int row, vec_float4 val );
+
+/*
+ * Get the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline vec_float4 vmathSoaT3GetElem_V( VmathSoaTransform3 tfrm, int col, int row );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaT3MulV3_V( VmathSoaTransform3 tfrm, VmathSoaVector3 vec );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaT3MulP3_V( VmathSoaTransform3 tfrm, VmathSoaPoint3 pnt );
+
+/*
+ * Multiply two 3x4 transformation matrices
+ */
+static inline VmathSoaTransform3 vmathSoaT3Mul_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 );
+
+/*
+ * Construct an identity 3x4 transformation matrix
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeIdentity_V( );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x axis
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationX_V( vec_float4 radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the y axis
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationY_V( vec_float4 radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the z axis
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationZ_V( vec_float4 radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationQ_V( VmathSoaQuat unitQuat );
+
+/*
+ * Construct a 3x4 transformation matrix to perform scaling
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeScale_V( VmathSoaVector3 scaleVec );
+
+/*
+ * Construct a 3x4 transformation matrix to perform translation
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeTranslation_V( VmathSoaVector3 translateVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathSoaTransform3 vmathSoaT3AppendScale_V( VmathSoaTransform3 tfrm, VmathSoaVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathSoaTransform3 vmathSoaT3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaTransform3 tfrm );
+
+/*
+ * Multiply two 3x4 transformation matrices per element
+ */
+static inline VmathSoaTransform3 vmathSoaT3MulPerElem_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 );
+
+/*
+ * Compute the absolute value of a 3x4 transformation matrix per element
+ */
+static inline VmathSoaTransform3 vmathSoaT3AbsPerElem_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Inverse of a 3x4 transformation matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+ */
+static inline VmathSoaTransform3 vmathSoaT3Inverse_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+ */
+static inline VmathSoaTransform3 vmathSoaT3OrthoInverse_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Conditionally select between two 3x4 transformation matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaTransform3 vmathSoaT3Select_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x4 transformation matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaT3Print_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Print a 3x4 transformation matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaT3Prints_V( VmathSoaTransform3 tfrm, const char *name );
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#include "vectormath_soa.h"
+#include "vec_soa_v.h"
+#include "quat_soa_v.h"
+#include "mat_soa_v.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/cpp/boolInVec.h b/Extras/vectormathlibrary/include/vectormath/spu/cpp/boolInVec.h
index ac535843b..93a3ad29d 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/cpp/boolInVec.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/cpp/boolInVec.h
@@ -1,246 +1,246 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _BOOLINVEC_H
-#define _BOOLINVEC_H
-
-#include <spu_intrinsics.h>
-
-namespace Vectormath {
-
-class floatInVec;
-
-//--------------------------------------------------------------------------------------------------
-// boolInVec class
-//
-
-class boolInVec
-{
-    private:
-        vec_uint4 mData;
-
-        inline boolInVec(vec_uint4 vec);
-    public:
-        inline boolInVec() {}
-
-        // matches standard type conversions
-        //
-        inline boolInVec(floatInVec vec);
-
-        // explicit cast from bool
-        //
-        explicit inline boolInVec(bool scalar);
-
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-        // explicit cast to bool
-        // 
-        inline bool getAsBool() const;
-#else
-        // implicit cast to bool
-        // 
-        inline operator bool() const;
-#endif
-    
-        // get vector data
-        // bool value is in the 0 word slot of vector as 0 (false) or -1 (true)
-        //
-        inline vec_uint4 get128() const;
-
-        // operators
-        //
-        inline const boolInVec operator ! () const;
-        inline boolInVec& operator = (boolInVec vec);
-        inline boolInVec& operator &= (boolInVec vec);
-        inline boolInVec& operator ^= (boolInVec vec);
-        inline boolInVec& operator |= (boolInVec vec);
-
-        // friend functions
-        //
-        friend inline const boolInVec operator == (boolInVec vec0, boolInVec vec1);
-        friend inline const boolInVec operator != (boolInVec vec0, boolInVec vec1);
-        friend inline const boolInVec operator < (floatInVec vec0, floatInVec vec1);
-        friend inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1);
-        friend inline const boolInVec operator > (floatInVec vec0, floatInVec vec1);
-        friend inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1);
-        friend inline const boolInVec operator == (floatInVec vec0, floatInVec vec1);
-        friend inline const boolInVec operator != (floatInVec vec0, floatInVec vec1);
-        friend inline const boolInVec operator & (boolInVec vec0, boolInVec vec1);
-        friend inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1);
-        friend inline const boolInVec operator | (boolInVec vec0, boolInVec vec1);
-        friend inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1);
-};
-
-//--------------------------------------------------------------------------------------------------
-// boolInVec functions
-//
-
-// operators
-//
-inline const boolInVec operator == (boolInVec vec0, boolInVec vec1);
-inline const boolInVec operator != (boolInVec vec0, boolInVec vec1);
-inline const boolInVec operator & (boolInVec vec0, boolInVec vec1);
-inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1);
-inline const boolInVec operator | (boolInVec vec0, boolInVec vec1);
-
-// select between vec0 and vec1 using boolInVec.
-// false selects vec0, true selects vec1
-//
-inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1);
-
-} // namespace Vectormath
-
-//--------------------------------------------------------------------------------------------------
-// boolInVec implementation
-//
-
-#include "floatInVec.h"
-
-namespace Vectormath {
-
-inline
-boolInVec::boolInVec(vec_uint4 vec)
-{
-    mData = vec;
-}
-
-inline
-boolInVec::boolInVec(floatInVec vec)
-{
-    *this = (vec != floatInVec(0.0f));
-}
-
-inline
-boolInVec::boolInVec(bool scalar)
-{
-    mData = spu_promote((unsigned int)-scalar, 0);
-}
-
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-inline
-bool
-boolInVec::getAsBool() const
-#else
-inline
-boolInVec::operator bool() const
-#endif
-{
-    return (bool)spu_extract(mData, 0);
-}
-
-inline
-vec_uint4
-boolInVec::get128() const
-{
-    return mData;
-}
-
-inline
-const boolInVec
-boolInVec::operator ! () const
-{
-    return boolInVec(spu_nor(mData, mData));
-}
-
-inline
-boolInVec&
-boolInVec::operator = (boolInVec vec)
-{
-    mData = vec.mData;
-    return *this;
-}
-
-inline
-boolInVec&
-boolInVec::operator &= (boolInVec vec)
-{
-    *this = *this & vec;
-    return *this;
-}
-
-inline
-boolInVec&
-boolInVec::operator ^= (boolInVec vec)
-{
-    *this = *this ^ vec;
-    return *this;
-}
-
-inline
-boolInVec&
-boolInVec::operator |= (boolInVec vec)
-{
-    *this = *this | vec;
-    return *this;
-}
-
-inline
-const boolInVec
-operator == (boolInVec vec0, boolInVec vec1)
-{
-    return boolInVec(spu_cmpeq(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator != (boolInVec vec0, boolInVec vec1)
-{
-    return !(vec0 == vec1);
-}
-    
-inline
-const boolInVec
-operator & (boolInVec vec0, boolInVec vec1)
-{
-    return boolInVec(spu_and(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator | (boolInVec vec0, boolInVec vec1)
-{
-    return boolInVec(spu_or(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator ^ (boolInVec vec0, boolInVec vec1)
-{
-    return boolInVec(spu_xor(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1)
-{
-    return boolInVec(spu_sel(vec0.get128(), vec1.get128(), select_vec1.get128()));
-}
- 
-} // namespace Vectormath
-
-#endif // boolInVec_h
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _BOOLINVEC_H
+#define _BOOLINVEC_H
+
+#include <spu_intrinsics.h>
+
+namespace Vectormath {
+
+class floatInVec;
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec class
+//
+
+class boolInVec
+{
+    private:
+        vec_uint4 mData;
+
+        inline boolInVec(vec_uint4 vec);
+    public:
+        inline boolInVec() {}
+
+        // matches standard type conversions
+        //
+        inline boolInVec(floatInVec vec);
+
+        // explicit cast from bool
+        //
+        explicit inline boolInVec(bool scalar);
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+        // explicit cast to bool
+        // 
+        inline bool getAsBool() const;
+#else
+        // implicit cast to bool
+        // 
+        inline operator bool() const;
+#endif
+    
+        // get vector data
+        // bool value is in the 0 word slot of vector as 0 (false) or -1 (true)
+        //
+        inline vec_uint4 get128() const;
+
+        // operators
+        //
+        inline const boolInVec operator ! () const;
+        inline boolInVec& operator = (boolInVec vec);
+        inline boolInVec& operator &= (boolInVec vec);
+        inline boolInVec& operator ^= (boolInVec vec);
+        inline boolInVec& operator |= (boolInVec vec);
+
+        // friend functions
+        //
+        friend inline const boolInVec operator == (boolInVec vec0, boolInVec vec1);
+        friend inline const boolInVec operator != (boolInVec vec0, boolInVec vec1);
+        friend inline const boolInVec operator < (floatInVec vec0, floatInVec vec1);
+        friend inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1);
+        friend inline const boolInVec operator > (floatInVec vec0, floatInVec vec1);
+        friend inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1);
+        friend inline const boolInVec operator == (floatInVec vec0, floatInVec vec1);
+        friend inline const boolInVec operator != (floatInVec vec0, floatInVec vec1);
+        friend inline const boolInVec operator & (boolInVec vec0, boolInVec vec1);
+        friend inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1);
+        friend inline const boolInVec operator | (boolInVec vec0, boolInVec vec1);
+        friend inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1);
+};
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec functions
+//
+
+// operators
+//
+inline const boolInVec operator == (boolInVec vec0, boolInVec vec1);
+inline const boolInVec operator != (boolInVec vec0, boolInVec vec1);
+inline const boolInVec operator & (boolInVec vec0, boolInVec vec1);
+inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1);
+inline const boolInVec operator | (boolInVec vec0, boolInVec vec1);
+
+// select between vec0 and vec1 using boolInVec.
+// false selects vec0, true selects vec1
+//
+inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1);
+
+} // namespace Vectormath
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec implementation
+//
+
+#include "floatInVec.h"
+
+namespace Vectormath {
+
+inline
+boolInVec::boolInVec(vec_uint4 vec)
+{
+    mData = vec;
+}
+
+inline
+boolInVec::boolInVec(floatInVec vec)
+{
+    *this = (vec != floatInVec(0.0f));
+}
+
+inline
+boolInVec::boolInVec(bool scalar)
+{
+    mData = spu_promote((unsigned int)-scalar, 0);
+}
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+inline
+bool
+boolInVec::getAsBool() const
+#else
+inline
+boolInVec::operator bool() const
+#endif
+{
+    return (bool)spu_extract(mData, 0);
+}
+
+inline
+vec_uint4
+boolInVec::get128() const
+{
+    return mData;
+}
+
+inline
+const boolInVec
+boolInVec::operator ! () const
+{
+    return boolInVec(spu_nor(mData, mData));
+}
+
+inline
+boolInVec&
+boolInVec::operator = (boolInVec vec)
+{
+    mData = vec.mData;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator &= (boolInVec vec)
+{
+    *this = *this & vec;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator ^= (boolInVec vec)
+{
+    *this = *this ^ vec;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator |= (boolInVec vec)
+{
+    *this = *this | vec;
+    return *this;
+}
+
+inline
+const boolInVec
+operator == (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(spu_cmpeq(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator != (boolInVec vec0, boolInVec vec1)
+{
+    return !(vec0 == vec1);
+}
+    
+inline
+const boolInVec
+operator & (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(spu_and(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator | (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(spu_or(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator ^ (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(spu_xor(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1)
+{
+    return boolInVec(spu_sel(vec0.get128(), vec1.get128(), select_vec1.get128()));
+}
+ 
+} // namespace Vectormath
+
+#endif // boolInVec_h
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/cpp/floatInVec.h b/Extras/vectormathlibrary/include/vectormath/spu/cpp/floatInVec.h
index 638f22f91..7521c0c40 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/cpp/floatInVec.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/cpp/floatInVec.h
@@ -1,339 +1,339 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _FLOATINVEC_H
-#define _FLOATINVEC_H
-
-#include <math.h>
-#include <spu_intrinsics.h>
-#include <simdmath.h>
-#undef bool
-
-namespace Vectormath {
-
-class boolInVec;
-
-//--------------------------------------------------------------------------------------------------
-// floatInVec class
-//
-
-class floatInVec
-{
-    private:
-        vec_float4 mData;
-
-        inline floatInVec(vec_float4 vec);
-    public:
-        inline floatInVec() {}
-
-        // matches standard type conversions
-        //
-        inline floatInVec(boolInVec vec);
-
-        // construct from a slot of vec_float4
-        //
-        inline floatInVec(vec_float4 vec, int slot);
-        
-        // explicit cast from float
-        //
-        explicit inline floatInVec(float scalar);
-
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-        // explicit cast to float
-        // 
-        inline float getAsFloat() const;
-#else
-        // implicit cast to float
-        //
-        inline operator float() const;
-#endif
-
-        // get vector data
-        // float value is in 0 word slot of vector
-        //
-        inline vec_float4 get128() const;
-
-        // operators
-        // 
-        inline const floatInVec operator ++ (int);
-        inline const floatInVec operator -- (int);
-        inline floatInVec& operator ++ ();
-        inline floatInVec& operator -- ();
-        inline const floatInVec operator - () const;
-        inline floatInVec& operator = (floatInVec vec);
-        inline floatInVec& operator *= (floatInVec vec);
-        inline floatInVec& operator /= (floatInVec vec);
-        inline floatInVec& operator += (floatInVec vec);
-        inline floatInVec& operator -= (floatInVec vec);
-
-        // friend functions
-        //
-        friend inline const floatInVec operator * (floatInVec vec0, floatInVec vec1);
-        friend inline const floatInVec operator / (floatInVec vec0, floatInVec vec1);
-        friend inline const floatInVec operator + (floatInVec vec0, floatInVec vec1);
-        friend inline const floatInVec operator - (floatInVec vec0, floatInVec vec1);
-        friend inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1);
-};
-
-//--------------------------------------------------------------------------------------------------
-// floatInVec functions
-//
-
-// operators
-// 
-inline const floatInVec operator * (floatInVec vec0, floatInVec vec1);
-inline const floatInVec operator / (floatInVec vec0, floatInVec vec1);
-inline const floatInVec operator + (floatInVec vec0, floatInVec vec1);
-inline const floatInVec operator - (floatInVec vec0, floatInVec vec1);
-inline const boolInVec operator < (floatInVec vec0, floatInVec vec1);
-inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1);
-inline const boolInVec operator > (floatInVec vec0, floatInVec vec1);
-inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1);
-inline const boolInVec operator == (floatInVec vec0, floatInVec vec1);
-inline const boolInVec operator != (floatInVec vec0, floatInVec vec1);
-
-// select between vec0 and vec1 using boolInVec.
-// false selects vec0, true selects vec1
-//
-inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1);
-
-} // namespace Vectormath
-
-//--------------------------------------------------------------------------------------------------
-// floatInVec implementation
-//
-
-#include "boolInVec.h"
-
-namespace Vectormath {
-
-inline
-floatInVec::floatInVec(vec_float4 vec)
-{
-    mData = vec;
-}
-
-inline
-floatInVec::floatInVec(boolInVec vec)
-{
-    mData = spu_sel(spu_splats(0.0f), spu_splats(1.0f), vec.get128());
-}
-
-inline
-floatInVec::floatInVec(vec_float4 vec, int slot)
-{
-    mData = spu_promote(spu_extract(vec, slot), 0);
-}
-
-inline
-floatInVec::floatInVec(float scalar)
-{
-    mData = spu_promote(scalar, 0);
-}
-
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-inline
-float
-floatInVec::getAsFloat() const
-#else
-inline
-floatInVec::operator float() const
-#endif
-{
-    return spu_extract(mData,0);
-}
-
-inline
-vec_float4
-floatInVec::get128() const
-{
-    return mData;
-}
-
-inline
-const floatInVec
-floatInVec::operator ++ (int)
-{
-    vec_float4 olddata = mData;
-    operator ++();
-    return floatInVec(olddata);
-}
-
-inline
-const floatInVec
-floatInVec::operator -- (int)
-{
-    vec_float4 olddata = mData;
-    operator --();
-    return floatInVec(olddata);
-}
-
-inline
-floatInVec&
-floatInVec::operator ++ ()
-{
-    *this += floatInVec(1.0f);
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator -- ()
-{
-    *this -= floatInVec(1.0f);
-    return *this;
-}
-
-inline
-const floatInVec
-floatInVec::operator - () const
-{
-    return floatInVec((vec_float4)spu_xor((vec_uint4)mData, spu_splats(0x80000000)));
-}
-
-inline
-floatInVec&
-floatInVec::operator = (floatInVec vec)
-{
-    mData = vec.mData;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator *= (floatInVec vec)
-{
-    *this = *this * vec;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator /= (floatInVec vec)
-{
-    *this = *this / vec;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator += (floatInVec vec)
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator -= (floatInVec vec)
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline
-const floatInVec
-operator * (floatInVec vec0, floatInVec vec1)
-{
-    return floatInVec(spu_mul(vec0.get128(), vec1.get128()));
-}
-
-inline
-const floatInVec
-operator / (floatInVec num, floatInVec den)
-{
-    return floatInVec(divf4(num.get128(), den.get128()));
-}
-
-inline
-const floatInVec
-operator + (floatInVec vec0, floatInVec vec1)
-{
-    return floatInVec(spu_add(vec0.get128(), vec1.get128()));
-}
-
-inline
-const floatInVec
-operator - (floatInVec vec0, floatInVec vec1)
-{
-    return floatInVec(spu_sub(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator < (floatInVec vec0, floatInVec vec1)
-{
-    return boolInVec(spu_cmpgt(vec1.get128(), vec0.get128()));
-}
-
-inline
-const boolInVec
-operator <= (floatInVec vec0, floatInVec vec1)
-{
-    return !(vec0 > vec1);
-}
-
-inline
-const boolInVec
-operator > (floatInVec vec0, floatInVec vec1)
-{
-    return boolInVec(spu_cmpgt(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator >= (floatInVec vec0, floatInVec vec1)
-{
-    return !(vec0 < vec1);
-}
-
-inline
-const boolInVec
-operator == (floatInVec vec0, floatInVec vec1)
-{
-    return boolInVec(spu_cmpeq(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator != (floatInVec vec0, floatInVec vec1)
-{
-    return !(vec0 == vec1);
-}
-    
-inline
-const floatInVec
-select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1)
-{
-    return floatInVec(spu_sel(vec0.get128(), vec1.get128(), select_vec1.get128()));
-}
-
-} // namespace Vectormath
-
-#endif // floatInVec_h
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _FLOATINVEC_H
+#define _FLOATINVEC_H
+
+#include <math.h>
+#include <spu_intrinsics.h>
+#include <simdmath.h>
+#undef bool
+
+namespace Vectormath {
+
+class boolInVec;
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec class
+//
+
+class floatInVec
+{
+    private:
+        vec_float4 mData;
+
+        inline floatInVec(vec_float4 vec);
+    public:
+        inline floatInVec() {}
+
+        // matches standard type conversions
+        //
+        inline floatInVec(boolInVec vec);
+
+        // construct from a slot of vec_float4
+        //
+        inline floatInVec(vec_float4 vec, int slot);
+        
+        // explicit cast from float
+        //
+        explicit inline floatInVec(float scalar);
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+        // explicit cast to float
+        // 
+        inline float getAsFloat() const;
+#else
+        // implicit cast to float
+        //
+        inline operator float() const;
+#endif
+
+        // get vector data
+        // float value is in 0 word slot of vector
+        //
+        inline vec_float4 get128() const;
+
+        // operators
+        // 
+        inline const floatInVec operator ++ (int);
+        inline const floatInVec operator -- (int);
+        inline floatInVec& operator ++ ();
+        inline floatInVec& operator -- ();
+        inline const floatInVec operator - () const;
+        inline floatInVec& operator = (floatInVec vec);
+        inline floatInVec& operator *= (floatInVec vec);
+        inline floatInVec& operator /= (floatInVec vec);
+        inline floatInVec& operator += (floatInVec vec);
+        inline floatInVec& operator -= (floatInVec vec);
+
+        // friend functions
+        //
+        friend inline const floatInVec operator * (floatInVec vec0, floatInVec vec1);
+        friend inline const floatInVec operator / (floatInVec vec0, floatInVec vec1);
+        friend inline const floatInVec operator + (floatInVec vec0, floatInVec vec1);
+        friend inline const floatInVec operator - (floatInVec vec0, floatInVec vec1);
+        friend inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1);
+};
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec functions
+//
+
+// operators
+// 
+inline const floatInVec operator * (floatInVec vec0, floatInVec vec1);
+inline const floatInVec operator / (floatInVec vec0, floatInVec vec1);
+inline const floatInVec operator + (floatInVec vec0, floatInVec vec1);
+inline const floatInVec operator - (floatInVec vec0, floatInVec vec1);
+inline const boolInVec operator < (floatInVec vec0, floatInVec vec1);
+inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1);
+inline const boolInVec operator > (floatInVec vec0, floatInVec vec1);
+inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1);
+inline const boolInVec operator == (floatInVec vec0, floatInVec vec1);
+inline const boolInVec operator != (floatInVec vec0, floatInVec vec1);
+
+// select between vec0 and vec1 using boolInVec.
+// false selects vec0, true selects vec1
+//
+inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1);
+
+} // namespace Vectormath
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec implementation
+//
+
+#include "boolInVec.h"
+
+namespace Vectormath {
+
+inline
+floatInVec::floatInVec(vec_float4 vec)
+{
+    mData = vec;
+}
+
+inline
+floatInVec::floatInVec(boolInVec vec)
+{
+    mData = spu_sel(spu_splats(0.0f), spu_splats(1.0f), vec.get128());
+}
+
+inline
+floatInVec::floatInVec(vec_float4 vec, int slot)
+{
+    mData = spu_promote(spu_extract(vec, slot), 0);
+}
+
+inline
+floatInVec::floatInVec(float scalar)
+{
+    mData = spu_promote(scalar, 0);
+}
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+inline
+float
+floatInVec::getAsFloat() const
+#else
+inline
+floatInVec::operator float() const
+#endif
+{
+    return spu_extract(mData,0);
+}
+
+inline
+vec_float4
+floatInVec::get128() const
+{
+    return mData;
+}
+
+inline
+const floatInVec
+floatInVec::operator ++ (int)
+{
+    vec_float4 olddata = mData;
+    operator ++();
+    return floatInVec(olddata);
+}
+
+inline
+const floatInVec
+floatInVec::operator -- (int)
+{
+    vec_float4 olddata = mData;
+    operator --();
+    return floatInVec(olddata);
+}
+
+inline
+floatInVec&
+floatInVec::operator ++ ()
+{
+    *this += floatInVec(1.0f);
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator -- ()
+{
+    *this -= floatInVec(1.0f);
+    return *this;
+}
+
+inline
+const floatInVec
+floatInVec::operator - () const
+{
+    return floatInVec((vec_float4)spu_xor((vec_uint4)mData, spu_splats(0x80000000)));
+}
+
+inline
+floatInVec&
+floatInVec::operator = (floatInVec vec)
+{
+    mData = vec.mData;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator *= (floatInVec vec)
+{
+    *this = *this * vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator /= (floatInVec vec)
+{
+    *this = *this / vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator += (floatInVec vec)
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator -= (floatInVec vec)
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline
+const floatInVec
+operator * (floatInVec vec0, floatInVec vec1)
+{
+    return floatInVec(spu_mul(vec0.get128(), vec1.get128()));
+}
+
+inline
+const floatInVec
+operator / (floatInVec num, floatInVec den)
+{
+    return floatInVec(divf4(num.get128(), den.get128()));
+}
+
+inline
+const floatInVec
+operator + (floatInVec vec0, floatInVec vec1)
+{
+    return floatInVec(spu_add(vec0.get128(), vec1.get128()));
+}
+
+inline
+const floatInVec
+operator - (floatInVec vec0, floatInVec vec1)
+{
+    return floatInVec(spu_sub(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator < (floatInVec vec0, floatInVec vec1)
+{
+    return boolInVec(spu_cmpgt(vec1.get128(), vec0.get128()));
+}
+
+inline
+const boolInVec
+operator <= (floatInVec vec0, floatInVec vec1)
+{
+    return !(vec0 > vec1);
+}
+
+inline
+const boolInVec
+operator > (floatInVec vec0, floatInVec vec1)
+{
+    return boolInVec(spu_cmpgt(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator >= (floatInVec vec0, floatInVec vec1)
+{
+    return !(vec0 < vec1);
+}
+
+inline
+const boolInVec
+operator == (floatInVec vec0, floatInVec vec1)
+{
+    return boolInVec(spu_cmpeq(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator != (floatInVec vec0, floatInVec vec1)
+{
+    return !(vec0 == vec1);
+}
+    
+inline
+const floatInVec
+select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1)
+{
+    return floatInVec(spu_sel(vec0.get128(), vec1.get128(), select_vec1.get128()));
+}
+
+} // namespace Vectormath
+
+#endif // floatInVec_h
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/cpp/mat_aos.h b/Extras/vectormathlibrary/include/vectormath/spu/cpp/mat_aos.h
index d4f955c23..a2fd611ec 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/cpp/mat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/cpp/mat_aos.h
@@ -1,2027 +1,2027 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_AOS_CPP_H
-#define _VECTORMATH_MAT_AOS_CPP_H
-
-namespace Vectormath {
-namespace Aos {
-
-//-----------------------------------------------------------------------------
-// Constants
-// for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
-
-#define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_0ZB0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_0 })     
-#define _VECTORMATH_SHUF_C0X0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_YA00 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_Z })
-#define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X })
-#define _VECTORMATH_SHUF_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y })
-#define _VECTORMATH_SHUF_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_ZAY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_BZX0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_0ZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A })
-#define _VECTORMATH_SHUF_Z0XB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_YX0C ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_CZD0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_BBY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-inline Matrix3::Matrix3( const Matrix3 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-}
-
-inline Matrix3::Matrix3( float scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-}
-
-inline Matrix3::Matrix3( Quat unitQuat )
-{
-    vec_float4 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2;
-    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
-    vec_uint4 select_x = (vec_uint4)spu_maskb(0xf000);
-    vec_uint4 select_z = (vec_uint4)spu_maskb(0x00f0);
-    xyzw_2 = spu_add( unitQuat.get128(), unitQuat.get128() );
-    wwww = spu_shuffle( unitQuat.get128(), unitQuat.get128(), shuffle_wwww );
-    yzxw = spu_shuffle( unitQuat.get128(), unitQuat.get128(), _VECTORMATH_SHUF_YZXW );
-    zxyw = spu_shuffle( unitQuat.get128(), unitQuat.get128(), _VECTORMATH_SHUF_ZXYW );
-    yzxw_2 = spu_shuffle( xyzw_2, xyzw_2, _VECTORMATH_SHUF_YZXW );
-    zxyw_2 = spu_shuffle( xyzw_2, xyzw_2, _VECTORMATH_SHUF_ZXYW );
-    tmp0 = spu_mul( yzxw_2, wwww );
-    tmp1 = spu_nmsub( yzxw, yzxw_2, spu_splats(1.0f) );
-    tmp2 = spu_mul( yzxw, xyzw_2 );
-    tmp0 = spu_madd( zxyw, xyzw_2, tmp0 );
-    tmp1 = spu_nmsub( zxyw, zxyw_2, tmp1 );
-    tmp2 = spu_nmsub( zxyw_2, wwww, tmp2 );
-    tmp3 = spu_sel( tmp0, tmp1, select_x );
-    tmp4 = spu_sel( tmp1, tmp2, select_x );
-    tmp5 = spu_sel( tmp2, tmp0, select_x );
-    mCol0 = Vector3( spu_sel( tmp3, tmp2, select_z ) );
-    mCol1 = Vector3( spu_sel( tmp4, tmp0, select_z ) );
-    mCol2 = Vector3( spu_sel( tmp5, tmp1, select_z ) );
-}
-
-inline Matrix3::Matrix3( Vector3 _col0, Vector3 _col1, Vector3 _col2 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-}
-
-inline Matrix3 & Matrix3::setCol0( Vector3 _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol1( Vector3 _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol2( Vector3 _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol( int col, Vector3 vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setRow( int row, Vector3 vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setElem( int col, int row, float val )
-{
-    (*this)[col].setElem(row, val);
-    return *this;
-}
-
-inline float Matrix3::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector3 Matrix3::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector3 Matrix3::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector3 Matrix3::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector3 Matrix3::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Matrix3::getRow( int row ) const
-{
-    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
-}
-
-inline Vector3 & Matrix3::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Matrix3::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    return *this;
-}
-
-inline const Matrix3 transpose( const Matrix3 & mat )
-{
-    vec_float4 tmp0, tmp1, res0, res1, res2;
-    tmp0 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_ZCWD );
-    res0 = spu_shuffle( tmp0, mat.getCol1().get128(), _VECTORMATH_SHUF_XAYB );
-    res1 = spu_shuffle( tmp0, mat.getCol1().get128(), _VECTORMATH_SHUF_ZBW0 );
-    res2 = spu_shuffle( tmp1, mat.getCol1().get128(), _VECTORMATH_SHUF_XCY0 );
-    return Matrix3(
-        Vector3( res0 ),
-        Vector3( res1 ),
-        Vector3( res2 )
-    );
-}
-
-inline const Matrix3 inverse( const Matrix3 & mat )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2;
-    tmp2 = _vmathVfCross( mat.getCol0().get128(), mat.getCol1().get128() );
-    tmp0 = _vmathVfCross( mat.getCol1().get128(), mat.getCol2().get128() );
-    tmp1 = _vmathVfCross( mat.getCol2().get128(), mat.getCol0().get128() );
-    dot = _vmathVfDot3( tmp2, mat.getCol2().get128() );
-    dot = spu_shuffle( dot, dot, (vec_uchar16)spu_splats(0x00010203) );
-    invdet = recipf4( dot );
-    tmp3 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_XAYB );
-    tmp4 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_ZCWD );
-    inv0 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_XAYB );
-    inv1 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_ZBW0 );
-    inv2 = spu_shuffle( tmp4, tmp1, _VECTORMATH_SHUF_XCY0 );
-    inv0 = spu_mul( inv0, invdet );
-    inv1 = spu_mul( inv1, invdet );
-    inv2 = spu_mul( inv2, invdet );
-    return Matrix3(
-        Vector3( inv0 ),
-        Vector3( inv1 ),
-        Vector3( inv2 )
-    );
-}
-
-inline float determinant( const Matrix3 & mat )
-{
-    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
-}
-
-inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( mCol0 + mat.mCol0 ),
-        ( mCol1 + mat.mCol1 ),
-        ( mCol2 + mat.mCol2 )
-    );
-}
-
-inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( mCol0 - mat.mCol0 ),
-        ( mCol1 - mat.mCol1 ),
-        ( mCol2 - mat.mCol2 )
-    );
-}
-
-inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
-{
-    *this = *this + mat;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
-{
-    *this = *this - mat;
-    return *this;
-}
-
-inline const Matrix3 Matrix3::operator -( ) const
-{
-    return Matrix3(
-        ( -mCol0 ),
-        ( -mCol1 ),
-        ( -mCol2 )
-    );
-}
-
-inline const Matrix3 absPerElem( const Matrix3 & mat )
-{
-    return Matrix3(
-        absPerElem( mat.getCol0() ),
-        absPerElem( mat.getCol1() ),
-        absPerElem( mat.getCol2() )
-    );
-}
-
-inline const Matrix3 Matrix3::operator *( float scalar ) const
-{
-    return Matrix3(
-        ( mCol0 * scalar ),
-        ( mCol1 * scalar ),
-        ( mCol2 * scalar )
-    );
-}
-
-inline Matrix3 & Matrix3::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Matrix3 operator *( float scalar, const Matrix3 & mat )
-{
-    return mat * scalar;
-}
-
-inline const Vector3 Matrix3::operator *( Vector3 vec ) const
-{
-    vec_float4 res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    xxxx = spu_shuffle( vec.get128(), vec.get128(), shuffle_xxxx );
-    yyyy = spu_shuffle( vec.get128(), vec.get128(), shuffle_yyyy );
-    zzzz = spu_shuffle( vec.get128(), vec.get128(), shuffle_zzzz );
-    res = spu_mul( mCol0.get128(), xxxx );
-    res = spu_madd( mCol1.get128(), yyyy, res );
-    res = spu_madd( mCol2.get128(), zzzz, res );
-    return Vector3( res );
-}
-
-inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( *this * mat.mCol0 ),
-        ( *this * mat.mCol1 ),
-        ( *this * mat.mCol2 )
-    );
-}
-
-inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
-{
-    *this = *this * mat;
-    return *this;
-}
-
-inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
-{
-    return Matrix3(
-        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
-        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
-        mulPerElem( mat0.getCol2(), mat1.getCol2() )
-    );
-}
-
-inline const Matrix3 Matrix3::identity( )
-{
-    return Matrix3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationX( float radians )
-{
-    vec_float4 s, c, res1, res2;
-    vec_uint4 select_y, select_z;
-    vec_float4 zero;
-    select_y = (vec_uint4)spu_maskb(0x0f00);
-    select_z = (vec_uint4)spu_maskb(0x00f0);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res1 = spu_sel( zero, c, select_y );
-    res1 = spu_sel( res1, s, select_z );
-    res2 = spu_sel( zero, negatef4(s), select_y );
-    res2 = spu_sel( res2, c, select_z );
-    return Matrix3(
-        Vector3::xAxis( ),
-        Vector3( res1 ),
-        Vector3( res2 )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationY( float radians )
-{
-    vec_float4 s, c, res0, res2;
-    vec_uint4 select_x, select_z;
-    vec_float4 zero;
-    select_x = (vec_uint4)spu_maskb(0xf000);
-    select_z = (vec_uint4)spu_maskb(0x00f0);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res0 = spu_sel( zero, c, select_x );
-    res0 = spu_sel( res0, negatef4(s), select_z );
-    res2 = spu_sel( zero, s, select_x );
-    res2 = spu_sel( res2, c, select_z );
-    return Matrix3(
-        Vector3( res0 ),
-        Vector3::yAxis( ),
-        Vector3( res2 )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationZ( float radians )
-{
-    vec_float4 s, c, res0, res1;
-    vec_uint4 select_x, select_y;
-    vec_float4 zero;
-    select_x = (vec_uint4)spu_maskb(0xf000);
-    select_y = (vec_uint4)spu_maskb(0x0f00);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res0 = spu_sel( zero, c, select_x );
-    res0 = spu_sel( res0, s, select_y );
-    res1 = spu_sel( zero, negatef4(s), select_x );
-    res1 = spu_sel( res1, c, select_y );
-    return Matrix3(
-        Vector3( res0 ),
-        Vector3( res1 ),
-        Vector3::zAxis( )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationZYX( Vector3 radiansXYZ )
-{
-    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    angles = radiansXYZ.get128();
-    angles = spu_insert( 0.0f, angles, 3 );
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
-    Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
-    Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
-    Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
-    X0 = spu_shuffle( s, s, shuffle_xxxx );
-    X1 = spu_shuffle( c, c, shuffle_xxxx );
-    tmp = spu_mul( Z0, Y1 );
-    return Matrix3(
-        Vector3( spu_mul( Z0, Y0 ) ),
-        Vector3( spu_madd( Z1, X1, spu_mul( tmp, X0 ) ) ),
-        Vector3( spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) ) )
-    );
-}
-
-inline const Matrix3 Matrix3::rotation( float radians, Vector3 unitVec )
-{
-    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    axis = unitVec.get128();
-    sincosf4( spu_splats( radians ), &s, &c );
-    xxxx = spu_shuffle( axis, axis, shuffle_xxxx );
-    yyyy = spu_shuffle( axis, axis, shuffle_yyyy );
-    zzzz = spu_shuffle( axis, axis, shuffle_zzzz );
-    oneMinusC = spu_sub( spu_splats(1.0f), c );
-    axisS = spu_mul( axis, s );
-    negAxisS = negatef4( axisS );
-    tmp0 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_0ZB0 );
-    tmp1 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_C0X0 );
-    tmp2 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_YA00 );
-    tmp0 = spu_sel( tmp0, c, (vec_uint4)spu_maskb(0xf000) );
-    tmp1 = spu_sel( tmp1, c, (vec_uint4)spu_maskb(0x0f00) );
-    tmp2 = spu_sel( tmp2, c, (vec_uint4)spu_maskb(0x00f0) );
-    return Matrix3(
-        Vector3( spu_madd( spu_mul( axis, xxxx ), oneMinusC, tmp0 ) ),
-        Vector3( spu_madd( spu_mul( axis, yyyy ), oneMinusC, tmp1 ) ),
-        Vector3( spu_madd( spu_mul( axis, zzzz ), oneMinusC, tmp2 ) )
-    );
-}
-
-inline const Matrix3 Matrix3::rotation( Quat unitQuat )
-{
-    return Matrix3( unitQuat );
-}
-
-inline const Matrix3 Matrix3::scale( Vector3 scaleVec )
-{
-    vec_float4 zero = spu_splats(0.0f);
-    return Matrix3(
-        Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0xf000) ) ),
-        Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x0f00) ) ),
-        Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x00f0) ) )
-    );
-}
-
-inline const Matrix3 appendScale( const Matrix3 & mat, Vector3 scaleVec )
-{
-    return Matrix3(
-        ( mat.getCol0() * scaleVec.getX( ) ),
-        ( mat.getCol1() * scaleVec.getY( ) ),
-        ( mat.getCol2() * scaleVec.getZ( ) )
-    );
-}
-
-inline const Matrix3 prependScale( Vector3 scaleVec, const Matrix3 & mat )
-{
-    return Matrix3(
-        mulPerElem( mat.getCol0(), scaleVec ),
-        mulPerElem( mat.getCol1(), scaleVec ),
-        mulPerElem( mat.getCol2(), scaleVec )
-    );
-}
-
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 )
-{
-    return Matrix3(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Matrix3 & mat )
-{
-    print( mat.getRow( 0 ) );
-    print( mat.getRow( 1 ) );
-    print( mat.getRow( 2 ) );
-}
-
-inline void print( const Matrix3 & mat, const char * name )
-{
-    printf("%s:\n", name);
-    print( mat );
-}
-
-#endif
-
-inline Matrix4::Matrix4( const Matrix4 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    mCol3 = mat.mCol3;
-}
-
-inline Matrix4::Matrix4( float scalar )
-{
-    mCol0 = Vector4( scalar );
-    mCol1 = Vector4( scalar );
-    mCol2 = Vector4( scalar );
-    mCol3 = Vector4( scalar );
-}
-
-inline Matrix4::Matrix4( const Transform3 & mat )
-{
-    mCol0 = Vector4( mat.getCol0(), 0.0f );
-    mCol1 = Vector4( mat.getCol1(), 0.0f );
-    mCol2 = Vector4( mat.getCol2(), 0.0f );
-    mCol3 = Vector4( mat.getCol3(), 1.0f );
-}
-
-inline Matrix4::Matrix4( Vector4 _col0, Vector4 _col1, Vector4 _col2, Vector4 _col3 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-    mCol3 = _col3;
-}
-
-inline Matrix4::Matrix4( const Matrix3 & mat, Vector3 translateVec )
-{
-    mCol0 = Vector4( mat.getCol0(), 0.0f );
-    mCol1 = Vector4( mat.getCol1(), 0.0f );
-    mCol2 = Vector4( mat.getCol2(), 0.0f );
-    mCol3 = Vector4( translateVec, 1.0f );
-}
-
-inline Matrix4::Matrix4( Quat unitQuat, Vector3 translateVec )
-{
-    Matrix3 mat;
-    mat = Matrix3( unitQuat );
-    mCol0 = Vector4( mat.getCol0(), 0.0f );
-    mCol1 = Vector4( mat.getCol1(), 0.0f );
-    mCol2 = Vector4( mat.getCol2(), 0.0f );
-    mCol3 = Vector4( translateVec, 1.0f );
-}
-
-inline Matrix4 & Matrix4::setCol0( Vector4 _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol1( Vector4 _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol2( Vector4 _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol3( Vector4 _col3 )
-{
-    mCol3 = _col3;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol( int col, Vector4 vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setRow( int row, Vector4 vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    mCol3.setElem( row, vec.getElem( 3 ) );
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setElem( int col, int row, float val )
-{
-    (*this)[col].setElem(row, val);
-    return *this;
-}
-
-inline float Matrix4::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector4 Matrix4::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector4 Matrix4::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector4 Matrix4::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector4 Matrix4::getCol3( ) const
-{
-    return mCol3;
-}
-
-inline const Vector4 Matrix4::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Matrix4::getRow( int row ) const
-{
-    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
-}
-
-inline Vector4 & Matrix4::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Matrix4::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    mCol3 = mat.mCol3;
-    return *this;
-}
-
-inline const Matrix4 transpose( const Matrix4 & mat )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3;
-    tmp0 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( mat.getCol1().get128(), mat.getCol3().get128(), _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( mat.getCol1().get128(), mat.getCol3().get128(), _VECTORMATH_SHUF_ZCWD );
-    res0 = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
-    res1 = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
-    res2 = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
-    res3 = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
-    return Matrix4(
-        Vector4( res0 ),
-        Vector4( res1 ),
-        Vector4( res2 ),
-        Vector4( res3 )
-    );
-}
-
-inline const Matrix4 inverse( const Matrix4 & mat )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vec_float4 in0, in1, in2, in3;
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    vec_float4 cof0, cof1, cof2, cof3;
-    vec_float4 t0, t1, t2, t3;
-    vec_float4 t01, t02, t03, t12, t23;
-    vec_float4 t1r, t2r;
-    vec_float4 t01r, t02r, t03r, t12r, t23r;
-    vec_float4 t1r3, t1r3r;
-    vec_float4 det, det1, det2, det3, invdet;
-    in0 = mat.getCol0().get128();
-    in1 = mat.getCol1().get128();
-    in2 = mat.getCol2().get128();
-    in3 = mat.getCol3().get128();
-    /* Perform transform of the input matrix of the form:
-     *    A B C D
-     *    E F G H
-     *    I J K L
-     *    M N O P
-     *
-     * The pseudo transpose of the input matrix is trans:
-     *    A E I M
-     *    J N B F
-     *    C G K O
-     *    L P D H
-     */
-    tmp0 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_XAZC);    /* A E C G */
-    tmp1 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_XAZC);    /* I M K O */
-    tmp2 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_YBWD);    /* B F D H */
-    tmp3 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_YBWD);    /* J N L P */
-    t0 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_XYAB);    /* A E I M */
-    t1 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_XYAB);    /* J N B F */
-    t2 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_ZWCD);    /* C G K O */
-    t3 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_ZWCD);    /* L P D H */
-    /* Generate a cofactor matrix. The computed cofactors reside in
-     * cof0, cof1, cof2, cof3.
-     */
-    t23 = spu_mul(t2, t3);                        /* CL GP KD OH */
-    t23 = spu_shuffle(t23, t23, _VECTORMATH_SHUF_YXWZ);     /* GP CL OH KD */
-    cof0 = spu_mul(t1, t23);                      /* JGP NCL BOH FKD */
-    cof1 = spu_mul(t0, t23);                      /* AGP ECL IOH MKD */
-    t23r = spu_rlqwbyte(t23, 8);                  /* OH KD GP CL */
-    cof0 = spu_msub(t1, t23r, cof0);              /* JOH NKD BGP FCL  - cof0 */
-    cof1 = spu_msub(t0, t23r, cof1);              /* AOH EKD IGP MCL  - cof1 */
-    cof1 = spu_rlqwbyte(cof1, 8);                 /* IGP MCL AOH EKD - IOH MKD AGP ECL */
-
-    t12 = spu_mul(t1, t2);                        /* JC NG BK FO */
-    t12 = spu_shuffle(t12, t12, _VECTORMATH_SHUF_YXWZ);     /* NG JC FO BK */
-    cof0 = spu_madd(t3, t12, cof0);               /* LNG PJC DFO HBK + cof0 */
-    cof3 = spu_mul(t0, t12);                      /* ANG EJC IFO MBK */
-    t12r = spu_rlqwbyte(t12, 8);                  /* FO BK NG JC */
-    cof0 = spu_nmsub(t3, t12r, cof0);             /* cof0 - LFO PBK DNG HJC */
-    cof3 = spu_msub(t0, t12r, cof3);              /* AFO EBK ING MJC - cof3 */
-    cof3 = spu_rlqwbyte(cof3, 8);                 /* ING MJC AFO EBK - IFO MBK ANG EJC */
-    t1r = spu_rlqwbyte(t1, 8);                    /* B F J N */
-    t2r = spu_rlqwbyte(t2, 8);                    /* K O C G */
-    t1r3 = spu_mul(t1r, t3);                      /* BL FP JD NH */
-    t1r3 = spu_shuffle(t1r3, t1r3, _VECTORMATH_SHUF_YXWZ);  /* FP BL NH JD */
-    cof0 = spu_madd(t2r, t1r3, cof0);             /* KFP OBL CNH GJD + cof0 */
-    cof2 = spu_mul(t0, t1r3);                     /* AFP EBL INH MJD */
-    t1r3r = spu_rlqwbyte(t1r3, 8);                /* NH JD FP BL */
-    cof0 = spu_nmsub(t2r, t1r3r, cof0);           /* cof0 - KNH OJD CFP GBL */
-    cof2 = spu_msub(t0, t1r3r, cof2);             /* ANH EJD IFP MBL - cof2 */
-    cof2 = spu_rlqwbyte(cof2, 8);                 /* IFP MBL ANH EJD - INH MJD AFP EBL */
-    t01 = spu_mul(t0, t1);                                /* AJ EN IB MF */
-    t01 = spu_shuffle(t01, t01, _VECTORMATH_SHUF_YXWZ);     /* EN AJ MF IB */
-    cof2 = spu_madd(t3, t01, cof2);               /* LEN PAJ DMF HIB + cof2 */
-    cof3 = spu_msub(t2r, t01, cof3);              /* KEN OAJ CMF GIB - cof3 */
-    t01r = spu_rlqwbyte(t01, 8);                  /* MF IB EN AJ */
-    cof2 = spu_msub(t3, t01r, cof2);              /* LMF PIB DEN HAJ - cof2 */
-    cof3 = spu_nmsub(t2r, t01r, cof3);            /* cof3 - KMF OIB CEN GAJ */
-    t03 = spu_mul(t0, t3);                                /* AL EP ID MH */
-    t03 = spu_shuffle(t03, t03, _VECTORMATH_SHUF_YXWZ);     /* EP AL MH ID */
-    cof1 = spu_nmsub(t2r, t03, cof1);             /* cof1 - KEP OAL CMH GID */
-    cof2 = spu_madd(t1, t03, cof2);               /* JEP NAL BMH FID + cof2 */
-    t03r = spu_rlqwbyte(t03, 8);                  /* MH ID EP AL */
-    cof1 = spu_madd(t2r, t03r, cof1);             /* KMH OID CEP GAL + cof1 */
-    cof2 = spu_nmsub(t1, t03r, cof2);             /* cof2 - JMH NID BEP FAL */
-    t02 = spu_mul(t0, t2r);                       /* AK EO IC MG */
-    t02 = spu_shuffle(t02, t02, _VECTORMATH_SHUF_YXWZ);     /* E0 AK MG IC */
-    cof1 = spu_madd(t3, t02, cof1);               /* LEO PAK DMG HIC + cof1 */
-    cof3 = spu_nmsub(t1, t02, cof3);              /* cof3 - JEO NAK BMG FIC */
-    t02r = spu_rlqwbyte(t02, 8);                  /* MG IC EO AK */
-    cof1 = spu_nmsub(t3, t02r, cof1);             /* cof1 - LMG PIC DEO HAK */
-    cof3 = spu_madd(t1, t02r, cof3);              /* JMG NIC BEO FAK + cof3 */
-    /* Compute the determinant of the matrix
-     *
-     * det = sum_across(t0 * cof0);
-     *
-     * We perform a sum across the entire vector so that
-     * we don't have to splat the result when multiplying the
-     * cofactors by the inverse of the determinant.
-     */
-    det  = spu_mul(t0, cof0);
-    det1 = spu_rlqwbyte(det, 4);
-    det2 = spu_rlqwbyte(det, 8);
-    det3 = spu_rlqwbyte(det, 12);
-    det  = spu_add(det, det1);
-    det2 = spu_add(det2, det3);
-    det  = spu_add(det, det2);
-    /* Compute the reciprocal of the determinant.
-     */
-    invdet = recipf4(det);
-    /* Multiply the cofactors by the reciprocal of the determinant.
-     */
-    return Matrix4(
-        Vector4( spu_mul(cof0, invdet) ),
-        Vector4( spu_mul(cof1, invdet) ),
-        Vector4( spu_mul(cof2, invdet) ),
-        Vector4( spu_mul(cof3, invdet) )
-    );
-}
-
-inline const Matrix4 affineInverse( const Matrix4 & mat )
-{
-    Transform3 affineMat;
-    affineMat.setCol0( mat.getCol0().getXYZ( ) );
-    affineMat.setCol1( mat.getCol1().getXYZ( ) );
-    affineMat.setCol2( mat.getCol2().getXYZ( ) );
-    affineMat.setCol3( mat.getCol3().getXYZ( ) );
-    return Matrix4( inverse( affineMat ) );
-}
-
-inline const Matrix4 orthoInverse( const Matrix4 & mat )
-{
-    Transform3 affineMat;
-    affineMat.setCol0( mat.getCol0().getXYZ( ) );
-    affineMat.setCol1( mat.getCol1().getXYZ( ) );
-    affineMat.setCol2( mat.getCol2().getXYZ( ) );
-    affineMat.setCol3( mat.getCol3().getXYZ( ) );
-    return Matrix4( orthoInverse( affineMat ) );
-}
-
-inline float determinant( const Matrix4 & mat )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vec_float4 in0, in1, in2, in3;
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    vec_float4 cof0;
-    vec_float4 t0, t1, t2, t3;
-    vec_float4 t12, t23;
-    vec_float4 t1r, t2r;
-    vec_float4 t12r, t23r;
-    vec_float4 t1r3, t1r3r;
-    in0 = mat.getCol0().get128();
-    in1 = mat.getCol1().get128();
-    in2 = mat.getCol2().get128();
-    in3 = mat.getCol3().get128();
-    /* Perform transform of the input matrix of the form:
-     *    A B C D
-     *    E F G H
-     *    I J K L
-     *    M N O P
-     *
-     * The pseudo transpose of the input matrix is trans:
-     *    A E I M
-     *    J N B F
-     *    C G K O
-     *    L P D H
-     */
-    tmp0 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_XAZC);    /* A E C G */
-    tmp1 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_XAZC);    /* I M K O */
-    tmp2 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_YBWD);    /* B F D H */
-    tmp3 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_YBWD);    /* J N L P */
-    t0 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_XYAB);    /* A E I M */
-    t1 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_XYAB);    /* J N B F */
-    t2 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_ZWCD);    /* C G K O */
-    t3 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_ZWCD);    /* L P D H */
-    /* Generate a cofactor matrix. The computed cofactors reside in
-     * cof0, cof1, cof2, cof3.
-     */
-    t23 = spu_mul(t2, t3);                        /* CL GP KD OH */
-    t23 = spu_shuffle(t23, t23, _VECTORMATH_SHUF_YXWZ);     /* GP CL OH KD */
-    cof0 = spu_mul(t1, t23);                      /* JGP NCL BOH FKD */
-    t23r = spu_rlqwbyte(t23, 8);                  /* OH KD GP CL */
-    cof0 = spu_msub(t1, t23r, cof0);              /* JOH NKD BGP FCL  - cof0 */
-
-    t12 = spu_mul(t1, t2);                        /* JC NG BK FO */
-    t12 = spu_shuffle(t12, t12, _VECTORMATH_SHUF_YXWZ);     /* NG JC FO BK */
-    cof0 = spu_madd(t3, t12, cof0);               /* LNG PJC DFO HBK + cof0 */
-    t12r = spu_rlqwbyte(t12, 8);                  /* FO BK NG JC */
-    cof0 = spu_nmsub(t3, t12r, cof0);             /* cof0 - LFO PBK DNG HJC */
-    t1r = spu_rlqwbyte(t1, 8);                    /* B F J N */
-    t2r = spu_rlqwbyte(t2, 8);                    /* K O C G */
-    t1r3 = spu_mul(t1r, t3);                      /* BL FP JD NH */
-    t1r3 = spu_shuffle(t1r3, t1r3, _VECTORMATH_SHUF_YXWZ);  /* FP BL NH JD */
-    cof0 = spu_madd(t2r, t1r3, cof0);             /* KFP OBL CNH GJD + cof0 */
-    t1r3r = spu_rlqwbyte(t1r3, 8);                /* NH JD FP BL */
-    cof0 = spu_nmsub(t2r, t1r3r, cof0);           /* cof0 - KNH OJD CFP GBL */
-    return spu_extract( _vmathVfDot4(t0,cof0), 0 );
-}
-
-inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( mCol0 + mat.mCol0 ),
-        ( mCol1 + mat.mCol1 ),
-        ( mCol2 + mat.mCol2 ),
-        ( mCol3 + mat.mCol3 )
-    );
-}
-
-inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( mCol0 - mat.mCol0 ),
-        ( mCol1 - mat.mCol1 ),
-        ( mCol2 - mat.mCol2 ),
-        ( mCol3 - mat.mCol3 )
-    );
-}
-
-inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
-{
-    *this = *this + mat;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
-{
-    *this = *this - mat;
-    return *this;
-}
-
-inline const Matrix4 Matrix4::operator -( ) const
-{
-    return Matrix4(
-        ( -mCol0 ),
-        ( -mCol1 ),
-        ( -mCol2 ),
-        ( -mCol3 )
-    );
-}
-
-inline const Matrix4 absPerElem( const Matrix4 & mat )
-{
-    return Matrix4(
-        absPerElem( mat.getCol0() ),
-        absPerElem( mat.getCol1() ),
-        absPerElem( mat.getCol2() ),
-        absPerElem( mat.getCol3() )
-    );
-}
-
-inline const Matrix4 Matrix4::operator *( float scalar ) const
-{
-    return Matrix4(
-        ( mCol0 * scalar ),
-        ( mCol1 * scalar ),
-        ( mCol2 * scalar ),
-        ( mCol3 * scalar )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Matrix4 operator *( float scalar, const Matrix4 & mat )
-{
-    return mat * scalar;
-}
-
-inline const Vector4 Matrix4::operator *( Vector4 vec ) const
-{
-    vec_float4 tmp0, tmp1, res;
-    vec_float4 xxxx, yyyy, zzzz, wwww;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
-    xxxx = spu_shuffle( vec.get128(), vec.get128(), shuffle_xxxx );
-    yyyy = spu_shuffle( vec.get128(), vec.get128(), shuffle_yyyy );
-    zzzz = spu_shuffle( vec.get128(), vec.get128(), shuffle_zzzz );
-    wwww = spu_shuffle( vec.get128(), vec.get128(), shuffle_wwww );
-    tmp0 = spu_mul( mCol0.get128(), xxxx );
-    tmp1 = spu_mul( mCol1.get128(), yyyy );
-    tmp0 = spu_madd( mCol2.get128(), zzzz, tmp0 );
-    tmp1 = spu_madd( mCol3.get128(), wwww, tmp1 );
-    res = spu_add( tmp0, tmp1 );
-    return Vector4( res );
-}
-
-inline const Vector4 Matrix4::operator *( Vector3 vec ) const
-{
-    vec_float4 res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    xxxx = spu_shuffle( vec.get128(), vec.get128(), shuffle_xxxx );
-    yyyy = spu_shuffle( vec.get128(), vec.get128(), shuffle_yyyy );
-    zzzz = spu_shuffle( vec.get128(), vec.get128(), shuffle_zzzz );
-    res = spu_mul( mCol0.get128(), xxxx );
-    res = spu_madd( mCol1.get128(), yyyy, res );
-    res = spu_madd( mCol2.get128(), zzzz, res );
-    return Vector4( res );
-}
-
-inline const Vector4 Matrix4::operator *( Point3 pnt ) const
-{
-    vec_float4 tmp0, tmp1, res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    xxxx = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_xxxx );
-    yyyy = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_yyyy );
-    zzzz = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_zzzz );
-    tmp0 = spu_mul( mCol0.get128(), xxxx );
-    tmp1 = spu_mul( mCol1.get128(), yyyy );
-    tmp0 = spu_madd( mCol2.get128(), zzzz, tmp0 );
-    tmp1 = spu_add( mCol3.get128(), tmp1 );
-    res = spu_add( tmp0, tmp1 );
-    return Vector4( res );
-}
-
-inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( *this * mat.mCol0 ),
-        ( *this * mat.mCol1 ),
-        ( *this * mat.mCol2 ),
-        ( *this * mat.mCol3 )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
-{
-    *this = *this * mat;
-    return *this;
-}
-
-inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
-{
-    return Matrix4(
-        ( *this * tfrm.getCol0() ),
-        ( *this * tfrm.getCol1() ),
-        ( *this * tfrm.getCol2() ),
-        ( *this * Point3( tfrm.getCol3() ) )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
-{
-    *this = *this * tfrm;
-    return *this;
-}
-
-inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
-{
-    return Matrix4(
-        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
-        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
-        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
-        mulPerElem( mat0.getCol3(), mat1.getCol3() )
-    );
-}
-
-inline const Matrix4 Matrix4::identity( )
-{
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4::yAxis( ),
-        Vector4::zAxis( ),
-        Vector4::wAxis( )
-    );
-}
-
-inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
-{
-    mCol0.setXYZ( mat3.getCol0() );
-    mCol1.setXYZ( mat3.getCol1() );
-    mCol2.setXYZ( mat3.getCol2() );
-    return *this;
-}
-
-inline const Matrix3 Matrix4::getUpper3x3( ) const
-{
-    return Matrix3(
-        mCol0.getXYZ( ),
-        mCol1.getXYZ( ),
-        mCol2.getXYZ( )
-    );
-}
-
-inline Matrix4 & Matrix4::setTranslation( Vector3 translateVec )
-{
-    mCol3.setXYZ( translateVec );
-    return *this;
-}
-
-inline const Vector3 Matrix4::getTranslation( ) const
-{
-    return mCol3.getXYZ( );
-}
-
-inline const Matrix4 Matrix4::rotationX( float radians )
-{
-    vec_float4 s, c, res1, res2;
-    vec_uint4 select_y, select_z;
-    vec_float4 zero;
-    select_y = (vec_uint4)spu_maskb(0x0f00);
-    select_z = (vec_uint4)spu_maskb(0x00f0);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res1 = spu_sel( zero, c, select_y );
-    res1 = spu_sel( res1, s, select_z );
-    res2 = spu_sel( zero, negatef4(s), select_y );
-    res2 = spu_sel( res2, c, select_z );
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4( res1 ),
-        Vector4( res2 ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationY( float radians )
-{
-    vec_float4 s, c, res0, res2;
-    vec_uint4 select_x, select_z;
-    vec_float4 zero;
-    select_x = (vec_uint4)spu_maskb(0xf000);
-    select_z = (vec_uint4)spu_maskb(0x00f0);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res0 = spu_sel( zero, c, select_x );
-    res0 = spu_sel( res0, negatef4(s), select_z );
-    res2 = spu_sel( zero, s, select_x );
-    res2 = spu_sel( res2, c, select_z );
-    return Matrix4(
-        Vector4( res0 ),
-        Vector4::yAxis( ),
-        Vector4( res2 ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationZ( float radians )
-{
-    vec_float4 s, c, res0, res1;
-    vec_uint4 select_x, select_y;
-    vec_float4 zero;
-    select_x = (vec_uint4)spu_maskb(0xf000);
-    select_y = (vec_uint4)spu_maskb(0x0f00);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res0 = spu_sel( zero, c, select_x );
-    res0 = spu_sel( res0, s, select_y );
-    res1 = spu_sel( zero, negatef4(s), select_x );
-    res1 = spu_sel( res1, c, select_y );
-    return Matrix4(
-        Vector4( res0 ),
-        Vector4( res1 ),
-        Vector4::zAxis( ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationZYX( Vector3 radiansXYZ )
-{
-    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    angles = radiansXYZ.get128();
-    angles = spu_insert( 0.0f, angles, 3 );
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
-    Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
-    Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
-    Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
-    X0 = spu_shuffle( s, s, shuffle_xxxx );
-    X1 = spu_shuffle( c, c, shuffle_xxxx );
-    tmp = spu_mul( Z0, Y1 );
-    return Matrix4(
-        Vector4( spu_mul( Z0, Y0 ) ),
-        Vector4( spu_madd( Z1, X1, spu_mul( tmp, X0 ) ) ),
-        Vector4( spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) ) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotation( float radians, Vector3 unitVec )
-{
-    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2, zeroW;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    axis = unitVec.get128();
-    sincosf4( spu_splats( radians ), &s, &c );
-    xxxx = spu_shuffle( axis, axis, shuffle_xxxx );
-    yyyy = spu_shuffle( axis, axis, shuffle_yyyy );
-    zzzz = spu_shuffle( axis, axis, shuffle_zzzz );
-    oneMinusC = spu_sub( spu_splats(1.0f), c );
-    axisS = spu_mul( axis, s );
-    negAxisS = negatef4( axisS );
-    tmp0 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_0ZB0 );
-    tmp1 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_C0X0 );
-    tmp2 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_YA00 );
-    tmp0 = spu_sel( tmp0, c, (vec_uint4)spu_maskb(0xf000) );
-    tmp1 = spu_sel( tmp1, c, (vec_uint4)spu_maskb(0x0f00) );
-    tmp2 = spu_sel( tmp2, c, (vec_uint4)spu_maskb(0x00f0) );
-    zeroW = (vec_float4)spu_maskb(0x000f);
-    axis = spu_andc( axis, zeroW );
-    return Matrix4(
-        Vector4( spu_madd( spu_mul( axis, xxxx ), oneMinusC, tmp0 ) ),
-        Vector4( spu_madd( spu_mul( axis, yyyy ), oneMinusC, tmp1 ) ),
-        Vector4( spu_madd( spu_mul( axis, zzzz ), oneMinusC, tmp2 ) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotation( Quat unitQuat )
-{
-    return Matrix4( Transform3::rotation( unitQuat ) );
-}
-
-inline const Matrix4 Matrix4::scale( Vector3 scaleVec )
-{
-    vec_float4 zero = spu_splats(0.0f);
-    return Matrix4(
-        Vector4( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0xf000) ) ),
-        Vector4( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x0f00) ) ),
-        Vector4( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x00f0) ) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 appendScale( const Matrix4 & mat, Vector3 scaleVec )
-{
-    return Matrix4(
-        ( mat.getCol0() * scaleVec.getX( ) ),
-        ( mat.getCol1() * scaleVec.getY( ) ),
-        ( mat.getCol2() * scaleVec.getZ( ) ),
-        mat.getCol3()
-    );
-}
-
-inline const Matrix4 prependScale( Vector3 scaleVec, const Matrix4 & mat )
-{
-    Vector4 scale4;
-    scale4 = Vector4( scaleVec, 1.0f );
-    return Matrix4(
-        mulPerElem( mat.getCol0(), scale4 ),
-        mulPerElem( mat.getCol1(), scale4 ),
-        mulPerElem( mat.getCol2(), scale4 ),
-        mulPerElem( mat.getCol3(), scale4 )
-    );
-}
-
-inline const Matrix4 Matrix4::translation( Vector3 translateVec )
-{
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4::yAxis( ),
-        Vector4::zAxis( ),
-        Vector4( translateVec, 1.0f )
-    );
-}
-
-inline const Matrix4 Matrix4::lookAt( Point3 eyePos, Point3 lookAtPos, Vector3 upVec )
-{
-    Matrix4 m4EyeFrame;
-    Vector3 v3X, v3Y, v3Z;
-    v3Y = normalize( upVec );
-    v3Z = normalize( ( eyePos - lookAtPos ) );
-    v3X = normalize( cross( v3Y, v3Z ) );
-    v3Y = cross( v3Z, v3X );
-    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
-    return orthoInverse( m4EyeFrame );
-}
-
-inline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar )
-{
-    float f, rangeInv;
-    vec_float4 zero, col0, col1, col2, col3;
-    f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f );
-    rangeInv = 1.0f / ( zNear - zFar );
-    zero = spu_splats(0.0f);
-    col0 = zero;
-    col1 = zero;
-    col2 = zero;
-    col3 = zero;
-    col0 = spu_insert( f / aspect, col0, 0 );
-    col1 = spu_insert( f, col1, 1 );
-    col2 = spu_insert( ( zNear + zFar ) * rangeInv, col2, 2 );
-    col2 = spu_insert( -1.0f, col2, 3 );
-    col3 = spu_insert( zNear * zFar * rangeInv * 2.0f, col3, 2 );
-    return Matrix4(
-        Vector4( col0 ),
-        Vector4( col1 ),
-        Vector4( col2 ),
-        Vector4( col3 )
-    );
-}
-
-inline const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vec_float4 lbf, rtn;
-    vec_float4 diff, sum, inv_diff;
-    vec_float4 diagonal, column, near2;
-    vec_float4 zero = spu_splats(0.0f);
-    lbf = spu_shuffle( spu_promote(left,0), spu_promote(zFar,0), _VECTORMATH_SHUF_XAYB );
-    rtn = spu_shuffle( spu_promote(right,0), spu_promote(zNear,0), _VECTORMATH_SHUF_XAYB );
-    lbf = spu_shuffle( lbf, spu_promote(bottom,0), _VECTORMATH_SHUF_XAYB );
-    rtn = spu_shuffle( rtn, spu_promote(top,0), _VECTORMATH_SHUF_XAYB );
-    diff = spu_sub( rtn, lbf );
-    sum  = spu_add( rtn, lbf );
-    inv_diff = recipf4( diff );
-    near2 = spu_splats( zNear );
-    near2 = spu_add( near2, near2 );
-    diagonal = spu_mul( near2, inv_diff );
-    column = spu_mul( sum, inv_diff );
-    return Matrix4(
-        Vector4( spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0xf000) ) ),
-        Vector4( spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x0f00) ) ),
-        Vector4( spu_sel( column, spu_splats(-1.0f), (vec_uint4)spu_maskb(0x000f) ) ),
-        Vector4( spu_sel( zero, spu_mul( diagonal, spu_splats(zFar) ), (vec_uint4)spu_maskb(0x00f0) ) )
-    );
-}
-
-inline const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vec_float4 lbf, rtn;
-    vec_float4 diff, sum, inv_diff, neg_inv_diff;
-    vec_float4 diagonal, column;
-    vec_float4 zero = spu_splats(0.0f);
-    lbf = spu_shuffle( spu_promote(left,0), spu_promote(zFar,0), _VECTORMATH_SHUF_XAYB );
-    rtn = spu_shuffle( spu_promote(right,0), spu_promote(zNear,0), _VECTORMATH_SHUF_XAYB );
-    lbf = spu_shuffle( lbf, spu_promote(bottom,0), _VECTORMATH_SHUF_XAYB );
-    rtn = spu_shuffle( rtn, spu_promote(top,0), _VECTORMATH_SHUF_XAYB );
-    diff = spu_sub( rtn, lbf );
-    sum  = spu_add( rtn, lbf );
-    inv_diff = recipf4( diff );
-    neg_inv_diff = negatef4( inv_diff );
-    diagonal = spu_add( inv_diff, inv_diff );
-    column = spu_mul( sum, spu_sel( neg_inv_diff, inv_diff, (vec_uint4)spu_maskb(0x00f0) ) );
-    return Matrix4(
-        Vector4( spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0xf000) ) ),
-        Vector4( spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x0f00) ) ),
-        Vector4( spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x00f0) ) ),
-        Vector4( spu_sel( column, spu_splats(1.0f), (vec_uint4)spu_maskb(0x000f) ) )
-    );
-}
-
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 )
-{
-    return Matrix4(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 ),
-        select( mat0.getCol3(), mat1.getCol3(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Matrix4 & mat )
-{
-    print( mat.getRow( 0 ) );
-    print( mat.getRow( 1 ) );
-    print( mat.getRow( 2 ) );
-    print( mat.getRow( 3 ) );
-}
-
-inline void print( const Matrix4 & mat, const char * name )
-{
-    printf("%s:\n", name);
-    print( mat );
-}
-
-#endif
-
-inline Transform3::Transform3( const Transform3 & tfrm )
-{
-    mCol0 = tfrm.mCol0;
-    mCol1 = tfrm.mCol1;
-    mCol2 = tfrm.mCol2;
-    mCol3 = tfrm.mCol3;
-}
-
-inline Transform3::Transform3( float scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-    mCol3 = Vector3( scalar );
-}
-
-inline Transform3::Transform3( Vector3 _col0, Vector3 _col1, Vector3 _col2, Vector3 _col3 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-    mCol3 = _col3;
-}
-
-inline Transform3::Transform3( const Matrix3 & tfrm, Vector3 translateVec )
-{
-    this->setUpper3x3( tfrm );
-    this->setTranslation( translateVec );
-}
-
-inline Transform3::Transform3( Quat unitQuat, Vector3 translateVec )
-{
-    this->setUpper3x3( Matrix3( unitQuat ) );
-    this->setTranslation( translateVec );
-}
-
-inline Transform3 & Transform3::setCol0( Vector3 _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol1( Vector3 _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol2( Vector3 _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol3( Vector3 _col3 )
-{
-    mCol3 = _col3;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol( int col, Vector3 vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Transform3 & Transform3::setRow( int row, Vector4 vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    mCol3.setElem( row, vec.getElem( 3 ) );
-    return *this;
-}
-
-inline Transform3 & Transform3::setElem( int col, int row, float val )
-{
-    (*this)[col].setElem(row, val);
-    return *this;
-}
-
-inline float Transform3::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector3 Transform3::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector3 Transform3::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector3 Transform3::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector3 Transform3::getCol3( ) const
-{
-    return mCol3;
-}
-
-inline const Vector3 Transform3::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Transform3::getRow( int row ) const
-{
-    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
-}
-
-inline Vector3 & Transform3::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Transform3::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
-{
-    mCol0 = tfrm.mCol0;
-    mCol1 = tfrm.mCol1;
-    mCol2 = tfrm.mCol2;
-    mCol3 = tfrm.mCol3;
-    return *this;
-}
-
-inline const Transform3 inverse( const Transform3 & tfrm )
-{
-    vec_float4 inv0, inv1, inv2, inv3;
-    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    tmp2 = _vmathVfCross( tfrm.getCol0().get128(), tfrm.getCol1().get128() );
-    tmp0 = _vmathVfCross( tfrm.getCol1().get128(), tfrm.getCol2().get128() );
-    tmp1 = _vmathVfCross( tfrm.getCol2().get128(), tfrm.getCol0().get128() );
-    inv3 = negatef4( tfrm.getCol3().get128() );
-    dot = _vmathVfDot3( tmp2, tfrm.getCol2().get128() );
-    dot = spu_shuffle( dot, dot, shuffle_xxxx );
-    invdet = recipf4( dot );
-    tmp3 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_XAYB );
-    tmp4 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_ZCWD );
-    inv0 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_XAYB );
-    xxxx = spu_shuffle( inv3, inv3, shuffle_xxxx );
-    inv1 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_ZBW0 );
-    inv2 = spu_shuffle( tmp4, tmp1, _VECTORMATH_SHUF_XCY0 );
-    yyyy = spu_shuffle( inv3, inv3, shuffle_yyyy );
-    zzzz = spu_shuffle( inv3, inv3, shuffle_zzzz );
-    inv3 = spu_mul( inv0, xxxx );
-    inv3 = spu_madd( inv1, yyyy, inv3 );
-    inv3 = spu_madd( inv2, zzzz, inv3 );
-    inv0 = spu_mul( inv0, invdet );
-    inv1 = spu_mul( inv1, invdet );
-    inv2 = spu_mul( inv2, invdet );
-    inv3 = spu_mul( inv3, invdet );
-    return Transform3(
-        Vector3( inv0 ),
-        Vector3( inv1 ),
-        Vector3( inv2 ),
-        Vector3( inv3 )
-    );
-}
-
-inline const Transform3 orthoInverse( const Transform3 & tfrm )
-{
-    vec_float4 inv0, inv1, inv2, inv3;
-    vec_float4 tmp0, tmp1;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    tmp0 = spu_shuffle( tfrm.getCol0().get128(), tfrm.getCol2().get128(), _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( tfrm.getCol0().get128(), tfrm.getCol2().get128(), _VECTORMATH_SHUF_ZCWD );
-    inv3 = negatef4( tfrm.getCol3().get128() );
-    inv0 = spu_shuffle( tmp0, tfrm.getCol1().get128(), _VECTORMATH_SHUF_XAYB );
-    xxxx = spu_shuffle( inv3, inv3, shuffle_xxxx );
-    inv1 = spu_shuffle( tmp0, tfrm.getCol1().get128(), _VECTORMATH_SHUF_ZBW0 );
-    inv2 = spu_shuffle( tmp1, tfrm.getCol1().get128(), _VECTORMATH_SHUF_XCY0 );
-    yyyy = spu_shuffle( inv3, inv3, shuffle_yyyy );
-    zzzz = spu_shuffle( inv3, inv3, shuffle_zzzz );
-    inv3 = spu_mul( inv0, xxxx );
-    inv3 = spu_madd( inv1, yyyy, inv3 );
-    inv3 = spu_madd( inv2, zzzz, inv3 );
-    return Transform3(
-        Vector3( inv0 ),
-        Vector3( inv1 ),
-        Vector3( inv2 ),
-        Vector3( inv3 )
-    );
-}
-
-inline const Transform3 absPerElem( const Transform3 & tfrm )
-{
-    return Transform3(
-        absPerElem( tfrm.getCol0() ),
-        absPerElem( tfrm.getCol1() ),
-        absPerElem( tfrm.getCol2() ),
-        absPerElem( tfrm.getCol3() )
-    );
-}
-
-inline const Vector3 Transform3::operator *( Vector3 vec ) const
-{
-    vec_float4 res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    xxxx = spu_shuffle( vec.get128(), vec.get128(), shuffle_xxxx );
-    yyyy = spu_shuffle( vec.get128(), vec.get128(), shuffle_yyyy );
-    zzzz = spu_shuffle( vec.get128(), vec.get128(), shuffle_zzzz );
-    res = spu_mul( mCol0.get128(), xxxx );
-    res = spu_madd( mCol1.get128(), yyyy, res );
-    res = spu_madd( mCol2.get128(), zzzz, res );
-    return Vector3( res );
-}
-
-inline const Point3 Transform3::operator *( Point3 pnt ) const
-{
-    vec_float4 tmp0, tmp1, res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    xxxx = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_xxxx );
-    yyyy = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_yyyy );
-    zzzz = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_zzzz );
-    tmp0 = spu_mul( mCol0.get128(), xxxx );
-    tmp1 = spu_mul( mCol1.get128(), yyyy );
-    tmp0 = spu_madd( mCol2.get128(), zzzz, tmp0 );
-    tmp1 = spu_add( mCol3.get128(), tmp1 );
-    res = spu_add( tmp0, tmp1 );
-    return Point3( res );
-}
-
-inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
-{
-    return Transform3(
-        ( *this * tfrm.mCol0 ),
-        ( *this * tfrm.mCol1 ),
-        ( *this * tfrm.mCol2 ),
-        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
-    );
-}
-
-inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
-{
-    *this = *this * tfrm;
-    return *this;
-}
-
-inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
-{
-    return Transform3(
-        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
-        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
-        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
-        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
-    );
-}
-
-inline const Transform3 Transform3::identity( )
-{
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( ),
-        Vector3( 0.0f )
-    );
-}
-
-inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
-{
-    mCol0 = tfrm.getCol0();
-    mCol1 = tfrm.getCol1();
-    mCol2 = tfrm.getCol2();
-    return *this;
-}
-
-inline const Matrix3 Transform3::getUpper3x3( ) const
-{
-    return Matrix3( mCol0, mCol1, mCol2 );
-}
-
-inline Transform3 & Transform3::setTranslation( Vector3 translateVec )
-{
-    mCol3 = translateVec;
-    return *this;
-}
-
-inline const Vector3 Transform3::getTranslation( ) const
-{
-    return mCol3;
-}
-
-inline const Transform3 Transform3::rotationX( float radians )
-{
-    vec_float4 s, c, res1, res2;
-    vec_uint4 select_y, select_z;
-    vec_float4 zero;
-    select_y = (vec_uint4)spu_maskb(0x0f00);
-    select_z = (vec_uint4)spu_maskb(0x00f0);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res1 = spu_sel( zero, c, select_y );
-    res1 = spu_sel( res1, s, select_z );
-    res2 = spu_sel( zero, negatef4(s), select_y );
-    res2 = spu_sel( res2, c, select_z );
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3( res1 ),
-        Vector3( res2 ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotationY( float radians )
-{
-    vec_float4 s, c, res0, res2;
-    vec_uint4 select_x, select_z;
-    vec_float4 zero;
-    select_x = (vec_uint4)spu_maskb(0xf000);
-    select_z = (vec_uint4)spu_maskb(0x00f0);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res0 = spu_sel( zero, c, select_x );
-    res0 = spu_sel( res0, negatef4(s), select_z );
-    res2 = spu_sel( zero, s, select_x );
-    res2 = spu_sel( res2, c, select_z );
-    return Transform3(
-        Vector3( res0 ),
-        Vector3::yAxis( ),
-        Vector3( res2 ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotationZ( float radians )
-{
-    vec_float4 s, c, res0, res1;
-    vec_uint4 select_x, select_y;
-    vec_float4 zero;
-    select_x = (vec_uint4)spu_maskb(0xf000);
-    select_y = (vec_uint4)spu_maskb(0x0f00);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res0 = spu_sel( zero, c, select_x );
-    res0 = spu_sel( res0, s, select_y );
-    res1 = spu_sel( zero, negatef4(s), select_x );
-    res1 = spu_sel( res1, c, select_y );
-    return Transform3(
-        Vector3( res0 ),
-        Vector3( res1 ),
-        Vector3::zAxis( ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotationZYX( Vector3 radiansXYZ )
-{
-    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    angles = radiansXYZ.get128();
-    angles = spu_insert( 0.0f, angles, 3 );
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
-    Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
-    Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
-    Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
-    X0 = spu_shuffle( s, s, shuffle_xxxx );
-    X1 = spu_shuffle( c, c, shuffle_xxxx );
-    tmp = spu_mul( Z0, Y1 );
-    return Transform3(
-        Vector3( spu_mul( Z0, Y0 ) ),
-        Vector3( spu_madd( Z1, X1, spu_mul( tmp, X0 ) ) ),
-        Vector3( spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) ) ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotation( float radians, Vector3 unitVec )
-{
-    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) );
-}
-
-inline const Transform3 Transform3::rotation( Quat unitQuat )
-{
-    return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) );
-}
-
-inline const Transform3 Transform3::scale( Vector3 scaleVec )
-{
-    vec_float4 zero = spu_splats(0.0f);
-    return Transform3(
-        Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0xf000) ) ),
-        Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x0f00) ) ),
-        Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x00f0) ) ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 appendScale( const Transform3 & tfrm, Vector3 scaleVec )
-{
-    return Transform3(
-        ( tfrm.getCol0() * scaleVec.getX( ) ),
-        ( tfrm.getCol1() * scaleVec.getY( ) ),
-        ( tfrm.getCol2() * scaleVec.getZ( ) ),
-        tfrm.getCol3()
-    );
-}
-
-inline const Transform3 prependScale( Vector3 scaleVec, const Transform3 & tfrm )
-{
-    return Transform3(
-        mulPerElem( tfrm.getCol0(), scaleVec ),
-        mulPerElem( tfrm.getCol1(), scaleVec ),
-        mulPerElem( tfrm.getCol2(), scaleVec ),
-        mulPerElem( tfrm.getCol3(), scaleVec )
-    );
-}
-
-inline const Transform3 Transform3::translation( Vector3 translateVec )
-{
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( ),
-        translateVec
-    );
-}
-
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 )
-{
-    return Transform3(
-        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
-        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
-        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
-        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Transform3 & tfrm )
-{
-    print( tfrm.getRow( 0 ) );
-    print( tfrm.getRow( 1 ) );
-    print( tfrm.getRow( 2 ) );
-}
-
-inline void print( const Transform3 & tfrm, const char * name )
-{
-    printf("%s:\n", name);
-    print( tfrm );
-}
-
-#endif
-
-inline Quat::Quat( const Matrix3 & tfrm )
-{
-    vec_float4 res;
-    vec_float4 col0, col1, col2;
-    vec_float4 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff;
-    vec_float4 zy_xz_yx, yz_zx_xy, sum, diff;
-    vec_float4 radicand, invSqrt, scale;
-    vec_float4 res0, res1, res2, res3;
-    vec_float4 xx, yy, zz;
-    vec_uint4 select_x = (vec_uint4)spu_maskb( 0xf000 );
-    vec_uint4 select_y = (vec_uint4)spu_maskb( 0x0f00 );
-    vec_uint4 select_z = (vec_uint4)spu_maskb( 0x00f0 );
-    vec_uint4 select_w = (vec_uint4)spu_maskb( 0x000f );
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((unsigned int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((unsigned int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((unsigned int)0x08090a0b);
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((unsigned int)0x0c0d0e0f);
-
-    col0 = tfrm.getCol0().get128();
-    col1 = tfrm.getCol1().get128();
-    col2 = tfrm.getCol2().get128();
-
-    /* four cases: */
-    /* trace > 0 */
-    /* else */
-    /*    xx largest diagonal element */
-    /*    yy largest diagonal element */
-    /*    zz largest diagonal element */
-
-    /* compute quaternion for each case */
-
-    xx_yy = spu_sel( col0, col1, select_y );
-    xx_yy_zz_xx = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_XYCX );
-    yy_zz_xx_yy = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_YCXY );
-    zz_xx_yy_zz = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_CXYC );
-
-    diagSum = spu_add( spu_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
-    diagDiff = spu_sub( spu_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
-    radicand = spu_add( spu_sel( diagDiff, diagSum, select_w ), spu_splats(1.0f) );
-    invSqrt = rsqrtf4( radicand );
-
-    zy_xz_yx = spu_sel( col0, col1, select_z );
-    zy_xz_yx = spu_shuffle( zy_xz_yx, col2, _VECTORMATH_SHUF_ZAY0 );
-    yz_zx_xy = spu_sel( col0, col1, select_x );
-    yz_zx_xy = spu_shuffle( yz_zx_xy, col2, _VECTORMATH_SHUF_BZX0 );
-
-    sum = spu_add( zy_xz_yx, yz_zx_xy );
-    diff = spu_sub( zy_xz_yx, yz_zx_xy );
-
-    scale = spu_mul( invSqrt, spu_splats(0.5f) );
-    res0 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_0ZYA );
-    res1 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_Z0XB );
-    res2 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_YX0C );
-    res3 = diff;
-    res0 = spu_sel( res0, radicand, select_x );
-    res1 = spu_sel( res1, radicand, select_y );
-    res2 = spu_sel( res2, radicand, select_z );
-    res3 = spu_sel( res3, radicand, select_w );
-    res0 = spu_mul( res0, spu_shuffle( scale, scale, shuffle_xxxx ) );
-    res1 = spu_mul( res1, spu_shuffle( scale, scale, shuffle_yyyy ) );
-    res2 = spu_mul( res2, spu_shuffle( scale, scale, shuffle_zzzz ) );
-    res3 = spu_mul( res3, spu_shuffle( scale, scale, shuffle_wwww ) );
-
-    /* determine case and select answer */
-
-    xx = spu_shuffle( col0, col0, shuffle_xxxx );
-    yy = spu_shuffle( col1, col1, shuffle_yyyy );
-    zz = spu_shuffle( col2, col2, shuffle_zzzz );
-    res = spu_sel( res0, res1, spu_cmpgt( yy, xx ) );
-    res = spu_sel( res, res2, spu_and( spu_cmpgt( zz, xx ), spu_cmpgt( zz, yy ) ) );
-    res = spu_sel( res, res3, spu_cmpgt( spu_shuffle( diagSum, diagSum, shuffle_xxxx ), spu_splats(0.0f) ) );
-    mVec128 = res;
-}
-
-inline const Matrix3 outer( Vector3 tfrm0, Vector3 tfrm1 )
-{
-    return Matrix3(
-        ( tfrm0 * tfrm1.getX( ) ),
-        ( tfrm0 * tfrm1.getY( ) ),
-        ( tfrm0 * tfrm1.getZ( ) )
-    );
-}
-
-inline const Matrix4 outer( Vector4 tfrm0, Vector4 tfrm1 )
-{
-    return Matrix4(
-        ( tfrm0 * tfrm1.getX( ) ),
-        ( tfrm0 * tfrm1.getY( ) ),
-        ( tfrm0 * tfrm1.getZ( ) ),
-        ( tfrm0 * tfrm1.getW( ) )
-    );
-}
-
-inline const Vector3 rowMul( Vector3 vec, const Matrix3 & mat )
-{
-    vec_float4 tmp0, tmp1, mcol0, mcol1, mcol2, res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    tmp0 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_ZCWD );
-    xxxx = spu_shuffle( vec.get128(), vec.get128(), shuffle_xxxx );
-    mcol0 = spu_shuffle( tmp0, mat.getCol1().get128(), _VECTORMATH_SHUF_XAYB );
-    mcol1 = spu_shuffle( tmp0, mat.getCol1().get128(), _VECTORMATH_SHUF_ZBW0 );
-    mcol2 = spu_shuffle( tmp1, mat.getCol1().get128(), _VECTORMATH_SHUF_XCY0 );
-    yyyy = spu_shuffle( vec.get128(), vec.get128(), shuffle_yyyy );
-    res = spu_mul( mcol0, xxxx );
-    zzzz = spu_shuffle( vec.get128(), vec.get128(), shuffle_zzzz );
-    res = spu_madd( mcol1, yyyy, res );
-    res = spu_madd( mcol2, zzzz, res );
-    return Vector3( res );
-}
-
-inline const Matrix3 crossMatrix( Vector3 vec )
-{
-    vec_float4 neg, res0, res1, res2;
-    neg = negatef4( vec.get128() );
-    res0 = spu_shuffle( vec.get128(), neg, _VECTORMATH_SHUF_0ZB0 );
-    res1 = spu_shuffle( vec.get128(), neg, _VECTORMATH_SHUF_C0X0 );
-    res2 = spu_shuffle( vec.get128(), neg, _VECTORMATH_SHUF_YA00 );
-    return Matrix3(
-        Vector3( res0 ),
-        Vector3( res1 ),
-        Vector3( res2 )
-    );
-}
-
-inline const Matrix3 crossMatrixMul( Vector3 vec, const Matrix3 & mat )
-{
-    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
-}
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_AOS_CPP_H
+#define _VECTORMATH_MAT_AOS_CPP_H
+
+namespace Vectormath {
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// Constants
+// for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+
+#define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_0ZB0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_0 })     
+#define _VECTORMATH_SHUF_C0X0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_YA00 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_Z })
+#define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X })
+#define _VECTORMATH_SHUF_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y })
+#define _VECTORMATH_SHUF_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_ZAY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_BZX0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_0ZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A })
+#define _VECTORMATH_SHUF_Z0XB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_YX0C ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_CZD0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_BBY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+inline Matrix3::Matrix3( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+}
+
+inline Matrix3::Matrix3( float scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+}
+
+inline Matrix3::Matrix3( Quat unitQuat )
+{
+    vec_float4 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2;
+    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
+    vec_uint4 select_x = (vec_uint4)spu_maskb(0xf000);
+    vec_uint4 select_z = (vec_uint4)spu_maskb(0x00f0);
+    xyzw_2 = spu_add( unitQuat.get128(), unitQuat.get128() );
+    wwww = spu_shuffle( unitQuat.get128(), unitQuat.get128(), shuffle_wwww );
+    yzxw = spu_shuffle( unitQuat.get128(), unitQuat.get128(), _VECTORMATH_SHUF_YZXW );
+    zxyw = spu_shuffle( unitQuat.get128(), unitQuat.get128(), _VECTORMATH_SHUF_ZXYW );
+    yzxw_2 = spu_shuffle( xyzw_2, xyzw_2, _VECTORMATH_SHUF_YZXW );
+    zxyw_2 = spu_shuffle( xyzw_2, xyzw_2, _VECTORMATH_SHUF_ZXYW );
+    tmp0 = spu_mul( yzxw_2, wwww );
+    tmp1 = spu_nmsub( yzxw, yzxw_2, spu_splats(1.0f) );
+    tmp2 = spu_mul( yzxw, xyzw_2 );
+    tmp0 = spu_madd( zxyw, xyzw_2, tmp0 );
+    tmp1 = spu_nmsub( zxyw, zxyw_2, tmp1 );
+    tmp2 = spu_nmsub( zxyw_2, wwww, tmp2 );
+    tmp3 = spu_sel( tmp0, tmp1, select_x );
+    tmp4 = spu_sel( tmp1, tmp2, select_x );
+    tmp5 = spu_sel( tmp2, tmp0, select_x );
+    mCol0 = Vector3( spu_sel( tmp3, tmp2, select_z ) );
+    mCol1 = Vector3( spu_sel( tmp4, tmp0, select_z ) );
+    mCol2 = Vector3( spu_sel( tmp5, tmp1, select_z ) );
+}
+
+inline Matrix3::Matrix3( Vector3 _col0, Vector3 _col1, Vector3 _col2 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+}
+
+inline Matrix3 & Matrix3::setCol0( Vector3 _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol1( Vector3 _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol2( Vector3 _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol( int col, Vector3 vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setRow( int row, Vector3 vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setElem( int col, int row, float val )
+{
+    (*this)[col].setElem(row, val);
+    return *this;
+}
+
+inline float Matrix3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Matrix3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Matrix3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Matrix3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Matrix3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::getRow( int row ) const
+{
+    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
+}
+
+inline Vector3 & Matrix3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    return *this;
+}
+
+inline const Matrix3 transpose( const Matrix3 & mat )
+{
+    vec_float4 tmp0, tmp1, res0, res1, res2;
+    tmp0 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_ZCWD );
+    res0 = spu_shuffle( tmp0, mat.getCol1().get128(), _VECTORMATH_SHUF_XAYB );
+    res1 = spu_shuffle( tmp0, mat.getCol1().get128(), _VECTORMATH_SHUF_ZBW0 );
+    res2 = spu_shuffle( tmp1, mat.getCol1().get128(), _VECTORMATH_SHUF_XCY0 );
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3( res2 )
+    );
+}
+
+inline const Matrix3 inverse( const Matrix3 & mat )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2;
+    tmp2 = _vmathVfCross( mat.getCol0().get128(), mat.getCol1().get128() );
+    tmp0 = _vmathVfCross( mat.getCol1().get128(), mat.getCol2().get128() );
+    tmp1 = _vmathVfCross( mat.getCol2().get128(), mat.getCol0().get128() );
+    dot = _vmathVfDot3( tmp2, mat.getCol2().get128() );
+    dot = spu_shuffle( dot, dot, (vec_uchar16)spu_splats(0x00010203) );
+    invdet = recipf4( dot );
+    tmp3 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_XAYB );
+    tmp4 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_ZCWD );
+    inv0 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_XAYB );
+    inv1 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_ZBW0 );
+    inv2 = spu_shuffle( tmp4, tmp1, _VECTORMATH_SHUF_XCY0 );
+    inv0 = spu_mul( inv0, invdet );
+    inv1 = spu_mul( inv1, invdet );
+    inv2 = spu_mul( inv2, invdet );
+    return Matrix3(
+        Vector3( inv0 ),
+        Vector3( inv1 ),
+        Vector3( inv2 )
+    );
+}
+
+inline float determinant( const Matrix3 & mat )
+{
+    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
+}
+
+inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 )
+    );
+}
+
+inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix3 Matrix3::operator -( ) const
+{
+    return Matrix3(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 )
+    );
+}
+
+inline const Matrix3 absPerElem( const Matrix3 & mat )
+{
+    return Matrix3(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::operator *( float scalar ) const
+{
+    return Matrix3(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix3 operator *( float scalar, const Matrix3 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector3 Matrix3::operator *( Vector3 vec ) const
+{
+    vec_float4 res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    xxxx = spu_shuffle( vec.get128(), vec.get128(), shuffle_xxxx );
+    yyyy = spu_shuffle( vec.get128(), vec.get128(), shuffle_yyyy );
+    zzzz = spu_shuffle( vec.get128(), vec.get128(), shuffle_zzzz );
+    res = spu_mul( mCol0.get128(), xxxx );
+    res = spu_madd( mCol1.get128(), yyyy, res );
+    res = spu_madd( mCol2.get128(), zzzz, res );
+    return Vector3( res );
+}
+
+inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
+{
+    return Matrix3(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::identity( )
+{
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationX( float radians )
+{
+    vec_float4 s, c, res1, res2;
+    vec_uint4 select_y, select_z;
+    vec_float4 zero;
+    select_y = (vec_uint4)spu_maskb(0x0f00);
+    select_z = (vec_uint4)spu_maskb(0x00f0);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res1 = spu_sel( zero, c, select_y );
+    res1 = spu_sel( res1, s, select_z );
+    res2 = spu_sel( zero, negatef4(s), select_y );
+    res2 = spu_sel( res2, c, select_z );
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3( res1 ),
+        Vector3( res2 )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationY( float radians )
+{
+    vec_float4 s, c, res0, res2;
+    vec_uint4 select_x, select_z;
+    vec_float4 zero;
+    select_x = (vec_uint4)spu_maskb(0xf000);
+    select_z = (vec_uint4)spu_maskb(0x00f0);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res0 = spu_sel( zero, c, select_x );
+    res0 = spu_sel( res0, negatef4(s), select_z );
+    res2 = spu_sel( zero, s, select_x );
+    res2 = spu_sel( res2, c, select_z );
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3::yAxis( ),
+        Vector3( res2 )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationZ( float radians )
+{
+    vec_float4 s, c, res0, res1;
+    vec_uint4 select_x, select_y;
+    vec_float4 zero;
+    select_x = (vec_uint4)spu_maskb(0xf000);
+    select_y = (vec_uint4)spu_maskb(0x0f00);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res0 = spu_sel( zero, c, select_x );
+    res0 = spu_sel( res0, s, select_y );
+    res1 = spu_sel( zero, negatef4(s), select_x );
+    res1 = spu_sel( res1, c, select_y );
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3::zAxis( )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationZYX( Vector3 radiansXYZ )
+{
+    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    angles = radiansXYZ.get128();
+    angles = spu_insert( 0.0f, angles, 3 );
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
+    Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
+    Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
+    Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
+    X0 = spu_shuffle( s, s, shuffle_xxxx );
+    X1 = spu_shuffle( c, c, shuffle_xxxx );
+    tmp = spu_mul( Z0, Y1 );
+    return Matrix3(
+        Vector3( spu_mul( Z0, Y0 ) ),
+        Vector3( spu_madd( Z1, X1, spu_mul( tmp, X0 ) ) ),
+        Vector3( spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( float radians, Vector3 unitVec )
+{
+    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    axis = unitVec.get128();
+    sincosf4( spu_splats( radians ), &s, &c );
+    xxxx = spu_shuffle( axis, axis, shuffle_xxxx );
+    yyyy = spu_shuffle( axis, axis, shuffle_yyyy );
+    zzzz = spu_shuffle( axis, axis, shuffle_zzzz );
+    oneMinusC = spu_sub( spu_splats(1.0f), c );
+    axisS = spu_mul( axis, s );
+    negAxisS = negatef4( axisS );
+    tmp0 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_0ZB0 );
+    tmp1 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_C0X0 );
+    tmp2 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_YA00 );
+    tmp0 = spu_sel( tmp0, c, (vec_uint4)spu_maskb(0xf000) );
+    tmp1 = spu_sel( tmp1, c, (vec_uint4)spu_maskb(0x0f00) );
+    tmp2 = spu_sel( tmp2, c, (vec_uint4)spu_maskb(0x00f0) );
+    return Matrix3(
+        Vector3( spu_madd( spu_mul( axis, xxxx ), oneMinusC, tmp0 ) ),
+        Vector3( spu_madd( spu_mul( axis, yyyy ), oneMinusC, tmp1 ) ),
+        Vector3( spu_madd( spu_mul( axis, zzzz ), oneMinusC, tmp2 ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( Quat unitQuat )
+{
+    return Matrix3( unitQuat );
+}
+
+inline const Matrix3 Matrix3::scale( Vector3 scaleVec )
+{
+    vec_float4 zero = spu_splats(0.0f);
+    return Matrix3(
+        Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0xf000) ) ),
+        Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x0f00) ) ),
+        Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x00f0) ) )
+    );
+}
+
+inline const Matrix3 appendScale( const Matrix3 & mat, Vector3 scaleVec )
+{
+    return Matrix3(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) )
+    );
+}
+
+inline const Matrix3 prependScale( Vector3 scaleVec, const Matrix3 & mat )
+{
+    return Matrix3(
+        mulPerElem( mat.getCol0(), scaleVec ),
+        mulPerElem( mat.getCol1(), scaleVec ),
+        mulPerElem( mat.getCol2(), scaleVec )
+    );
+}
+
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 )
+{
+    return Matrix3(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix3 & mat )
+{
+    print( mat.getRow( 0 ) );
+    print( mat.getRow( 1 ) );
+    print( mat.getRow( 2 ) );
+}
+
+inline void print( const Matrix3 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Matrix4::Matrix4( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+}
+
+inline Matrix4::Matrix4( float scalar )
+{
+    mCol0 = Vector4( scalar );
+    mCol1 = Vector4( scalar );
+    mCol2 = Vector4( scalar );
+    mCol3 = Vector4( scalar );
+}
+
+inline Matrix4::Matrix4( const Transform3 & mat )
+{
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( mat.getCol3(), 1.0f );
+}
+
+inline Matrix4::Matrix4( Vector4 _col0, Vector4 _col1, Vector4 _col2, Vector4 _col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Matrix4::Matrix4( const Matrix3 & mat, Vector3 translateVec )
+{
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( translateVec, 1.0f );
+}
+
+inline Matrix4::Matrix4( Quat unitQuat, Vector3 translateVec )
+{
+    Matrix3 mat;
+    mat = Matrix3( unitQuat );
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( translateVec, 1.0f );
+}
+
+inline Matrix4 & Matrix4::setCol0( Vector4 _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol1( Vector4 _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol2( Vector4 _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol3( Vector4 _col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol( int col, Vector4 vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setRow( int row, Vector4 vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setElem( int col, int row, float val )
+{
+    (*this)[col].setElem(row, val);
+    return *this;
+}
+
+inline float Matrix4::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector4 Matrix4::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector4 Matrix4::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector4 Matrix4::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector4 Matrix4::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector4 Matrix4::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector4 & Matrix4::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+    return *this;
+}
+
+inline const Matrix4 transpose( const Matrix4 & mat )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3;
+    tmp0 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( mat.getCol1().get128(), mat.getCol3().get128(), _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( mat.getCol1().get128(), mat.getCol3().get128(), _VECTORMATH_SHUF_ZCWD );
+    res0 = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
+    res1 = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
+    res2 = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
+    res3 = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
+    return Matrix4(
+        Vector4( res0 ),
+        Vector4( res1 ),
+        Vector4( res2 ),
+        Vector4( res3 )
+    );
+}
+
+inline const Matrix4 inverse( const Matrix4 & mat )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vec_float4 in0, in1, in2, in3;
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    vec_float4 cof0, cof1, cof2, cof3;
+    vec_float4 t0, t1, t2, t3;
+    vec_float4 t01, t02, t03, t12, t23;
+    vec_float4 t1r, t2r;
+    vec_float4 t01r, t02r, t03r, t12r, t23r;
+    vec_float4 t1r3, t1r3r;
+    vec_float4 det, det1, det2, det3, invdet;
+    in0 = mat.getCol0().get128();
+    in1 = mat.getCol1().get128();
+    in2 = mat.getCol2().get128();
+    in3 = mat.getCol3().get128();
+    /* Perform transform of the input matrix of the form:
+     *    A B C D
+     *    E F G H
+     *    I J K L
+     *    M N O P
+     *
+     * The pseudo transpose of the input matrix is trans:
+     *    A E I M
+     *    J N B F
+     *    C G K O
+     *    L P D H
+     */
+    tmp0 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_XAZC);    /* A E C G */
+    tmp1 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_XAZC);    /* I M K O */
+    tmp2 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_YBWD);    /* B F D H */
+    tmp3 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_YBWD);    /* J N L P */
+    t0 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_XYAB);    /* A E I M */
+    t1 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_XYAB);    /* J N B F */
+    t2 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_ZWCD);    /* C G K O */
+    t3 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_ZWCD);    /* L P D H */
+    /* Generate a cofactor matrix. The computed cofactors reside in
+     * cof0, cof1, cof2, cof3.
+     */
+    t23 = spu_mul(t2, t3);                        /* CL GP KD OH */
+    t23 = spu_shuffle(t23, t23, _VECTORMATH_SHUF_YXWZ);     /* GP CL OH KD */
+    cof0 = spu_mul(t1, t23);                      /* JGP NCL BOH FKD */
+    cof1 = spu_mul(t0, t23);                      /* AGP ECL IOH MKD */
+    t23r = spu_rlqwbyte(t23, 8);                  /* OH KD GP CL */
+    cof0 = spu_msub(t1, t23r, cof0);              /* JOH NKD BGP FCL  - cof0 */
+    cof1 = spu_msub(t0, t23r, cof1);              /* AOH EKD IGP MCL  - cof1 */
+    cof1 = spu_rlqwbyte(cof1, 8);                 /* IGP MCL AOH EKD - IOH MKD AGP ECL */
+
+    t12 = spu_mul(t1, t2);                        /* JC NG BK FO */
+    t12 = spu_shuffle(t12, t12, _VECTORMATH_SHUF_YXWZ);     /* NG JC FO BK */
+    cof0 = spu_madd(t3, t12, cof0);               /* LNG PJC DFO HBK + cof0 */
+    cof3 = spu_mul(t0, t12);                      /* ANG EJC IFO MBK */
+    t12r = spu_rlqwbyte(t12, 8);                  /* FO BK NG JC */
+    cof0 = spu_nmsub(t3, t12r, cof0);             /* cof0 - LFO PBK DNG HJC */
+    cof3 = spu_msub(t0, t12r, cof3);              /* AFO EBK ING MJC - cof3 */
+    cof3 = spu_rlqwbyte(cof3, 8);                 /* ING MJC AFO EBK - IFO MBK ANG EJC */
+    t1r = spu_rlqwbyte(t1, 8);                    /* B F J N */
+    t2r = spu_rlqwbyte(t2, 8);                    /* K O C G */
+    t1r3 = spu_mul(t1r, t3);                      /* BL FP JD NH */
+    t1r3 = spu_shuffle(t1r3, t1r3, _VECTORMATH_SHUF_YXWZ);  /* FP BL NH JD */
+    cof0 = spu_madd(t2r, t1r3, cof0);             /* KFP OBL CNH GJD + cof0 */
+    cof2 = spu_mul(t0, t1r3);                     /* AFP EBL INH MJD */
+    t1r3r = spu_rlqwbyte(t1r3, 8);                /* NH JD FP BL */
+    cof0 = spu_nmsub(t2r, t1r3r, cof0);           /* cof0 - KNH OJD CFP GBL */
+    cof2 = spu_msub(t0, t1r3r, cof2);             /* ANH EJD IFP MBL - cof2 */
+    cof2 = spu_rlqwbyte(cof2, 8);                 /* IFP MBL ANH EJD - INH MJD AFP EBL */
+    t01 = spu_mul(t0, t1);                                /* AJ EN IB MF */
+    t01 = spu_shuffle(t01, t01, _VECTORMATH_SHUF_YXWZ);     /* EN AJ MF IB */
+    cof2 = spu_madd(t3, t01, cof2);               /* LEN PAJ DMF HIB + cof2 */
+    cof3 = spu_msub(t2r, t01, cof3);              /* KEN OAJ CMF GIB - cof3 */
+    t01r = spu_rlqwbyte(t01, 8);                  /* MF IB EN AJ */
+    cof2 = spu_msub(t3, t01r, cof2);              /* LMF PIB DEN HAJ - cof2 */
+    cof3 = spu_nmsub(t2r, t01r, cof3);            /* cof3 - KMF OIB CEN GAJ */
+    t03 = spu_mul(t0, t3);                                /* AL EP ID MH */
+    t03 = spu_shuffle(t03, t03, _VECTORMATH_SHUF_YXWZ);     /* EP AL MH ID */
+    cof1 = spu_nmsub(t2r, t03, cof1);             /* cof1 - KEP OAL CMH GID */
+    cof2 = spu_madd(t1, t03, cof2);               /* JEP NAL BMH FID + cof2 */
+    t03r = spu_rlqwbyte(t03, 8);                  /* MH ID EP AL */
+    cof1 = spu_madd(t2r, t03r, cof1);             /* KMH OID CEP GAL + cof1 */
+    cof2 = spu_nmsub(t1, t03r, cof2);             /* cof2 - JMH NID BEP FAL */
+    t02 = spu_mul(t0, t2r);                       /* AK EO IC MG */
+    t02 = spu_shuffle(t02, t02, _VECTORMATH_SHUF_YXWZ);     /* E0 AK MG IC */
+    cof1 = spu_madd(t3, t02, cof1);               /* LEO PAK DMG HIC + cof1 */
+    cof3 = spu_nmsub(t1, t02, cof3);              /* cof3 - JEO NAK BMG FIC */
+    t02r = spu_rlqwbyte(t02, 8);                  /* MG IC EO AK */
+    cof1 = spu_nmsub(t3, t02r, cof1);             /* cof1 - LMG PIC DEO HAK */
+    cof3 = spu_madd(t1, t02r, cof3);              /* JMG NIC BEO FAK + cof3 */
+    /* Compute the determinant of the matrix
+     *
+     * det = sum_across(t0 * cof0);
+     *
+     * We perform a sum across the entire vector so that
+     * we don't have to splat the result when multiplying the
+     * cofactors by the inverse of the determinant.
+     */
+    det  = spu_mul(t0, cof0);
+    det1 = spu_rlqwbyte(det, 4);
+    det2 = spu_rlqwbyte(det, 8);
+    det3 = spu_rlqwbyte(det, 12);
+    det  = spu_add(det, det1);
+    det2 = spu_add(det2, det3);
+    det  = spu_add(det, det2);
+    /* Compute the reciprocal of the determinant.
+     */
+    invdet = recipf4(det);
+    /* Multiply the cofactors by the reciprocal of the determinant.
+     */
+    return Matrix4(
+        Vector4( spu_mul(cof0, invdet) ),
+        Vector4( spu_mul(cof1, invdet) ),
+        Vector4( spu_mul(cof2, invdet) ),
+        Vector4( spu_mul(cof3, invdet) )
+    );
+}
+
+inline const Matrix4 affineInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( inverse( affineMat ) );
+}
+
+inline const Matrix4 orthoInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( orthoInverse( affineMat ) );
+}
+
+inline float determinant( const Matrix4 & mat )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vec_float4 in0, in1, in2, in3;
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    vec_float4 cof0;
+    vec_float4 t0, t1, t2, t3;
+    vec_float4 t12, t23;
+    vec_float4 t1r, t2r;
+    vec_float4 t12r, t23r;
+    vec_float4 t1r3, t1r3r;
+    in0 = mat.getCol0().get128();
+    in1 = mat.getCol1().get128();
+    in2 = mat.getCol2().get128();
+    in3 = mat.getCol3().get128();
+    /* Perform transform of the input matrix of the form:
+     *    A B C D
+     *    E F G H
+     *    I J K L
+     *    M N O P
+     *
+     * The pseudo transpose of the input matrix is trans:
+     *    A E I M
+     *    J N B F
+     *    C G K O
+     *    L P D H
+     */
+    tmp0 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_XAZC);    /* A E C G */
+    tmp1 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_XAZC);    /* I M K O */
+    tmp2 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_YBWD);    /* B F D H */
+    tmp3 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_YBWD);    /* J N L P */
+    t0 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_XYAB);    /* A E I M */
+    t1 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_XYAB);    /* J N B F */
+    t2 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_ZWCD);    /* C G K O */
+    t3 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_ZWCD);    /* L P D H */
+    /* Generate a cofactor matrix. The computed cofactors reside in
+     * cof0, cof1, cof2, cof3.
+     */
+    t23 = spu_mul(t2, t3);                        /* CL GP KD OH */
+    t23 = spu_shuffle(t23, t23, _VECTORMATH_SHUF_YXWZ);     /* GP CL OH KD */
+    cof0 = spu_mul(t1, t23);                      /* JGP NCL BOH FKD */
+    t23r = spu_rlqwbyte(t23, 8);                  /* OH KD GP CL */
+    cof0 = spu_msub(t1, t23r, cof0);              /* JOH NKD BGP FCL  - cof0 */
+
+    t12 = spu_mul(t1, t2);                        /* JC NG BK FO */
+    t12 = spu_shuffle(t12, t12, _VECTORMATH_SHUF_YXWZ);     /* NG JC FO BK */
+    cof0 = spu_madd(t3, t12, cof0);               /* LNG PJC DFO HBK + cof0 */
+    t12r = spu_rlqwbyte(t12, 8);                  /* FO BK NG JC */
+    cof0 = spu_nmsub(t3, t12r, cof0);             /* cof0 - LFO PBK DNG HJC */
+    t1r = spu_rlqwbyte(t1, 8);                    /* B F J N */
+    t2r = spu_rlqwbyte(t2, 8);                    /* K O C G */
+    t1r3 = spu_mul(t1r, t3);                      /* BL FP JD NH */
+    t1r3 = spu_shuffle(t1r3, t1r3, _VECTORMATH_SHUF_YXWZ);  /* FP BL NH JD */
+    cof0 = spu_madd(t2r, t1r3, cof0);             /* KFP OBL CNH GJD + cof0 */
+    t1r3r = spu_rlqwbyte(t1r3, 8);                /* NH JD FP BL */
+    cof0 = spu_nmsub(t2r, t1r3r, cof0);           /* cof0 - KNH OJD CFP GBL */
+    return spu_extract( _vmathVfDot4(t0,cof0), 0 );
+}
+
+inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 ),
+        ( mCol3 + mat.mCol3 )
+    );
+}
+
+inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 ),
+        ( mCol3 - mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator -( ) const
+{
+    return Matrix4(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 ),
+        ( -mCol3 )
+    );
+}
+
+inline const Matrix4 absPerElem( const Matrix4 & mat )
+{
+    return Matrix4(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() ),
+        absPerElem( mat.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::operator *( float scalar ) const
+{
+    return Matrix4(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar ),
+        ( mCol3 * scalar )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix4 operator *( float scalar, const Matrix4 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector4 Matrix4::operator *( Vector4 vec ) const
+{
+    vec_float4 tmp0, tmp1, res;
+    vec_float4 xxxx, yyyy, zzzz, wwww;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
+    xxxx = spu_shuffle( vec.get128(), vec.get128(), shuffle_xxxx );
+    yyyy = spu_shuffle( vec.get128(), vec.get128(), shuffle_yyyy );
+    zzzz = spu_shuffle( vec.get128(), vec.get128(), shuffle_zzzz );
+    wwww = spu_shuffle( vec.get128(), vec.get128(), shuffle_wwww );
+    tmp0 = spu_mul( mCol0.get128(), xxxx );
+    tmp1 = spu_mul( mCol1.get128(), yyyy );
+    tmp0 = spu_madd( mCol2.get128(), zzzz, tmp0 );
+    tmp1 = spu_madd( mCol3.get128(), wwww, tmp1 );
+    res = spu_add( tmp0, tmp1 );
+    return Vector4( res );
+}
+
+inline const Vector4 Matrix4::operator *( Vector3 vec ) const
+{
+    vec_float4 res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    xxxx = spu_shuffle( vec.get128(), vec.get128(), shuffle_xxxx );
+    yyyy = spu_shuffle( vec.get128(), vec.get128(), shuffle_yyyy );
+    zzzz = spu_shuffle( vec.get128(), vec.get128(), shuffle_zzzz );
+    res = spu_mul( mCol0.get128(), xxxx );
+    res = spu_madd( mCol1.get128(), yyyy, res );
+    res = spu_madd( mCol2.get128(), zzzz, res );
+    return Vector4( res );
+}
+
+inline const Vector4 Matrix4::operator *( Point3 pnt ) const
+{
+    vec_float4 tmp0, tmp1, res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    xxxx = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_xxxx );
+    yyyy = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_yyyy );
+    zzzz = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_zzzz );
+    tmp0 = spu_mul( mCol0.get128(), xxxx );
+    tmp1 = spu_mul( mCol1.get128(), yyyy );
+    tmp0 = spu_madd( mCol2.get128(), zzzz, tmp0 );
+    tmp1 = spu_add( mCol3.get128(), tmp1 );
+    res = spu_add( tmp0, tmp1 );
+    return Vector4( res );
+}
+
+inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 ),
+        ( *this * mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
+{
+    return Matrix4(
+        ( *this * tfrm.getCol0() ),
+        ( *this * tfrm.getCol1() ),
+        ( *this * tfrm.getCol2() ),
+        ( *this * Point3( tfrm.getCol3() ) )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
+{
+    return Matrix4(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
+        mulPerElem( mat0.getCol3(), mat1.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::identity( )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
+{
+    mCol0.setXYZ( mat3.getCol0() );
+    mCol1.setXYZ( mat3.getCol1() );
+    mCol2.setXYZ( mat3.getCol2() );
+    return *this;
+}
+
+inline const Matrix3 Matrix4::getUpper3x3( ) const
+{
+    return Matrix3(
+        mCol0.getXYZ( ),
+        mCol1.getXYZ( ),
+        mCol2.getXYZ( )
+    );
+}
+
+inline Matrix4 & Matrix4::setTranslation( Vector3 translateVec )
+{
+    mCol3.setXYZ( translateVec );
+    return *this;
+}
+
+inline const Vector3 Matrix4::getTranslation( ) const
+{
+    return mCol3.getXYZ( );
+}
+
+inline const Matrix4 Matrix4::rotationX( float radians )
+{
+    vec_float4 s, c, res1, res2;
+    vec_uint4 select_y, select_z;
+    vec_float4 zero;
+    select_y = (vec_uint4)spu_maskb(0x0f00);
+    select_z = (vec_uint4)spu_maskb(0x00f0);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res1 = spu_sel( zero, c, select_y );
+    res1 = spu_sel( res1, s, select_z );
+    res2 = spu_sel( zero, negatef4(s), select_y );
+    res2 = spu_sel( res2, c, select_z );
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4( res1 ),
+        Vector4( res2 ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationY( float radians )
+{
+    vec_float4 s, c, res0, res2;
+    vec_uint4 select_x, select_z;
+    vec_float4 zero;
+    select_x = (vec_uint4)spu_maskb(0xf000);
+    select_z = (vec_uint4)spu_maskb(0x00f0);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res0 = spu_sel( zero, c, select_x );
+    res0 = spu_sel( res0, negatef4(s), select_z );
+    res2 = spu_sel( zero, s, select_x );
+    res2 = spu_sel( res2, c, select_z );
+    return Matrix4(
+        Vector4( res0 ),
+        Vector4::yAxis( ),
+        Vector4( res2 ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZ( float radians )
+{
+    vec_float4 s, c, res0, res1;
+    vec_uint4 select_x, select_y;
+    vec_float4 zero;
+    select_x = (vec_uint4)spu_maskb(0xf000);
+    select_y = (vec_uint4)spu_maskb(0x0f00);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res0 = spu_sel( zero, c, select_x );
+    res0 = spu_sel( res0, s, select_y );
+    res1 = spu_sel( zero, negatef4(s), select_x );
+    res1 = spu_sel( res1, c, select_y );
+    return Matrix4(
+        Vector4( res0 ),
+        Vector4( res1 ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZYX( Vector3 radiansXYZ )
+{
+    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    angles = radiansXYZ.get128();
+    angles = spu_insert( 0.0f, angles, 3 );
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
+    Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
+    Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
+    Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
+    X0 = spu_shuffle( s, s, shuffle_xxxx );
+    X1 = spu_shuffle( c, c, shuffle_xxxx );
+    tmp = spu_mul( Z0, Y1 );
+    return Matrix4(
+        Vector4( spu_mul( Z0, Y0 ) ),
+        Vector4( spu_madd( Z1, X1, spu_mul( tmp, X0 ) ) ),
+        Vector4( spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) ) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( float radians, Vector3 unitVec )
+{
+    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2, zeroW;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    axis = unitVec.get128();
+    sincosf4( spu_splats( radians ), &s, &c );
+    xxxx = spu_shuffle( axis, axis, shuffle_xxxx );
+    yyyy = spu_shuffle( axis, axis, shuffle_yyyy );
+    zzzz = spu_shuffle( axis, axis, shuffle_zzzz );
+    oneMinusC = spu_sub( spu_splats(1.0f), c );
+    axisS = spu_mul( axis, s );
+    negAxisS = negatef4( axisS );
+    tmp0 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_0ZB0 );
+    tmp1 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_C0X0 );
+    tmp2 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_YA00 );
+    tmp0 = spu_sel( tmp0, c, (vec_uint4)spu_maskb(0xf000) );
+    tmp1 = spu_sel( tmp1, c, (vec_uint4)spu_maskb(0x0f00) );
+    tmp2 = spu_sel( tmp2, c, (vec_uint4)spu_maskb(0x00f0) );
+    zeroW = (vec_float4)spu_maskb(0x000f);
+    axis = spu_andc( axis, zeroW );
+    return Matrix4(
+        Vector4( spu_madd( spu_mul( axis, xxxx ), oneMinusC, tmp0 ) ),
+        Vector4( spu_madd( spu_mul( axis, yyyy ), oneMinusC, tmp1 ) ),
+        Vector4( spu_madd( spu_mul( axis, zzzz ), oneMinusC, tmp2 ) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( Quat unitQuat )
+{
+    return Matrix4( Transform3::rotation( unitQuat ) );
+}
+
+inline const Matrix4 Matrix4::scale( Vector3 scaleVec )
+{
+    vec_float4 zero = spu_splats(0.0f);
+    return Matrix4(
+        Vector4( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0xf000) ) ),
+        Vector4( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x0f00) ) ),
+        Vector4( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x00f0) ) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 appendScale( const Matrix4 & mat, Vector3 scaleVec )
+{
+    return Matrix4(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) ),
+        mat.getCol3()
+    );
+}
+
+inline const Matrix4 prependScale( Vector3 scaleVec, const Matrix4 & mat )
+{
+    Vector4 scale4;
+    scale4 = Vector4( scaleVec, 1.0f );
+    return Matrix4(
+        mulPerElem( mat.getCol0(), scale4 ),
+        mulPerElem( mat.getCol1(), scale4 ),
+        mulPerElem( mat.getCol2(), scale4 ),
+        mulPerElem( mat.getCol3(), scale4 )
+    );
+}
+
+inline const Matrix4 Matrix4::translation( Vector3 translateVec )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4( translateVec, 1.0f )
+    );
+}
+
+inline const Matrix4 Matrix4::lookAt( Point3 eyePos, Point3 lookAtPos, Vector3 upVec )
+{
+    Matrix4 m4EyeFrame;
+    Vector3 v3X, v3Y, v3Z;
+    v3Y = normalize( upVec );
+    v3Z = normalize( ( eyePos - lookAtPos ) );
+    v3X = normalize( cross( v3Y, v3Z ) );
+    v3Y = cross( v3Z, v3X );
+    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
+    return orthoInverse( m4EyeFrame );
+}
+
+inline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar )
+{
+    float f, rangeInv;
+    vec_float4 zero, col0, col1, col2, col3;
+    f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f );
+    rangeInv = 1.0f / ( zNear - zFar );
+    zero = spu_splats(0.0f);
+    col0 = zero;
+    col1 = zero;
+    col2 = zero;
+    col3 = zero;
+    col0 = spu_insert( f / aspect, col0, 0 );
+    col1 = spu_insert( f, col1, 1 );
+    col2 = spu_insert( ( zNear + zFar ) * rangeInv, col2, 2 );
+    col2 = spu_insert( -1.0f, col2, 3 );
+    col3 = spu_insert( zNear * zFar * rangeInv * 2.0f, col3, 2 );
+    return Matrix4(
+        Vector4( col0 ),
+        Vector4( col1 ),
+        Vector4( col2 ),
+        Vector4( col3 )
+    );
+}
+
+inline const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vec_float4 lbf, rtn;
+    vec_float4 diff, sum, inv_diff;
+    vec_float4 diagonal, column, near2;
+    vec_float4 zero = spu_splats(0.0f);
+    lbf = spu_shuffle( spu_promote(left,0), spu_promote(zFar,0), _VECTORMATH_SHUF_XAYB );
+    rtn = spu_shuffle( spu_promote(right,0), spu_promote(zNear,0), _VECTORMATH_SHUF_XAYB );
+    lbf = spu_shuffle( lbf, spu_promote(bottom,0), _VECTORMATH_SHUF_XAYB );
+    rtn = spu_shuffle( rtn, spu_promote(top,0), _VECTORMATH_SHUF_XAYB );
+    diff = spu_sub( rtn, lbf );
+    sum  = spu_add( rtn, lbf );
+    inv_diff = recipf4( diff );
+    near2 = spu_splats( zNear );
+    near2 = spu_add( near2, near2 );
+    diagonal = spu_mul( near2, inv_diff );
+    column = spu_mul( sum, inv_diff );
+    return Matrix4(
+        Vector4( spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0xf000) ) ),
+        Vector4( spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x0f00) ) ),
+        Vector4( spu_sel( column, spu_splats(-1.0f), (vec_uint4)spu_maskb(0x000f) ) ),
+        Vector4( spu_sel( zero, spu_mul( diagonal, spu_splats(zFar) ), (vec_uint4)spu_maskb(0x00f0) ) )
+    );
+}
+
+inline const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vec_float4 lbf, rtn;
+    vec_float4 diff, sum, inv_diff, neg_inv_diff;
+    vec_float4 diagonal, column;
+    vec_float4 zero = spu_splats(0.0f);
+    lbf = spu_shuffle( spu_promote(left,0), spu_promote(zFar,0), _VECTORMATH_SHUF_XAYB );
+    rtn = spu_shuffle( spu_promote(right,0), spu_promote(zNear,0), _VECTORMATH_SHUF_XAYB );
+    lbf = spu_shuffle( lbf, spu_promote(bottom,0), _VECTORMATH_SHUF_XAYB );
+    rtn = spu_shuffle( rtn, spu_promote(top,0), _VECTORMATH_SHUF_XAYB );
+    diff = spu_sub( rtn, lbf );
+    sum  = spu_add( rtn, lbf );
+    inv_diff = recipf4( diff );
+    neg_inv_diff = negatef4( inv_diff );
+    diagonal = spu_add( inv_diff, inv_diff );
+    column = spu_mul( sum, spu_sel( neg_inv_diff, inv_diff, (vec_uint4)spu_maskb(0x00f0) ) );
+    return Matrix4(
+        Vector4( spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0xf000) ) ),
+        Vector4( spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x0f00) ) ),
+        Vector4( spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x00f0) ) ),
+        Vector4( spu_sel( column, spu_splats(1.0f), (vec_uint4)spu_maskb(0x000f) ) )
+    );
+}
+
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 )
+{
+    return Matrix4(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 ),
+        select( mat0.getCol3(), mat1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix4 & mat )
+{
+    print( mat.getRow( 0 ) );
+    print( mat.getRow( 1 ) );
+    print( mat.getRow( 2 ) );
+    print( mat.getRow( 3 ) );
+}
+
+inline void print( const Matrix4 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Transform3::Transform3( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+}
+
+inline Transform3::Transform3( float scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+    mCol3 = Vector3( scalar );
+}
+
+inline Transform3::Transform3( Vector3 _col0, Vector3 _col1, Vector3 _col2, Vector3 _col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Transform3::Transform3( const Matrix3 & tfrm, Vector3 translateVec )
+{
+    this->setUpper3x3( tfrm );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3::Transform3( Quat unitQuat, Vector3 translateVec )
+{
+    this->setUpper3x3( Matrix3( unitQuat ) );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3 & Transform3::setCol0( Vector3 _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol1( Vector3 _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol2( Vector3 _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol3( Vector3 _col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol( int col, Vector3 vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Transform3 & Transform3::setRow( int row, Vector4 vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Transform3 & Transform3::setElem( int col, int row, float val )
+{
+    (*this)[col].setElem(row, val);
+    return *this;
+}
+
+inline float Transform3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Transform3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Transform3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Transform3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Transform3::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector3 Transform3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Transform3::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector3 & Transform3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Transform3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+    return *this;
+}
+
+inline const Transform3 inverse( const Transform3 & tfrm )
+{
+    vec_float4 inv0, inv1, inv2, inv3;
+    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    tmp2 = _vmathVfCross( tfrm.getCol0().get128(), tfrm.getCol1().get128() );
+    tmp0 = _vmathVfCross( tfrm.getCol1().get128(), tfrm.getCol2().get128() );
+    tmp1 = _vmathVfCross( tfrm.getCol2().get128(), tfrm.getCol0().get128() );
+    inv3 = negatef4( tfrm.getCol3().get128() );
+    dot = _vmathVfDot3( tmp2, tfrm.getCol2().get128() );
+    dot = spu_shuffle( dot, dot, shuffle_xxxx );
+    invdet = recipf4( dot );
+    tmp3 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_XAYB );
+    tmp4 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_ZCWD );
+    inv0 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_XAYB );
+    xxxx = spu_shuffle( inv3, inv3, shuffle_xxxx );
+    inv1 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_ZBW0 );
+    inv2 = spu_shuffle( tmp4, tmp1, _VECTORMATH_SHUF_XCY0 );
+    yyyy = spu_shuffle( inv3, inv3, shuffle_yyyy );
+    zzzz = spu_shuffle( inv3, inv3, shuffle_zzzz );
+    inv3 = spu_mul( inv0, xxxx );
+    inv3 = spu_madd( inv1, yyyy, inv3 );
+    inv3 = spu_madd( inv2, zzzz, inv3 );
+    inv0 = spu_mul( inv0, invdet );
+    inv1 = spu_mul( inv1, invdet );
+    inv2 = spu_mul( inv2, invdet );
+    inv3 = spu_mul( inv3, invdet );
+    return Transform3(
+        Vector3( inv0 ),
+        Vector3( inv1 ),
+        Vector3( inv2 ),
+        Vector3( inv3 )
+    );
+}
+
+inline const Transform3 orthoInverse( const Transform3 & tfrm )
+{
+    vec_float4 inv0, inv1, inv2, inv3;
+    vec_float4 tmp0, tmp1;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    tmp0 = spu_shuffle( tfrm.getCol0().get128(), tfrm.getCol2().get128(), _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( tfrm.getCol0().get128(), tfrm.getCol2().get128(), _VECTORMATH_SHUF_ZCWD );
+    inv3 = negatef4( tfrm.getCol3().get128() );
+    inv0 = spu_shuffle( tmp0, tfrm.getCol1().get128(), _VECTORMATH_SHUF_XAYB );
+    xxxx = spu_shuffle( inv3, inv3, shuffle_xxxx );
+    inv1 = spu_shuffle( tmp0, tfrm.getCol1().get128(), _VECTORMATH_SHUF_ZBW0 );
+    inv2 = spu_shuffle( tmp1, tfrm.getCol1().get128(), _VECTORMATH_SHUF_XCY0 );
+    yyyy = spu_shuffle( inv3, inv3, shuffle_yyyy );
+    zzzz = spu_shuffle( inv3, inv3, shuffle_zzzz );
+    inv3 = spu_mul( inv0, xxxx );
+    inv3 = spu_madd( inv1, yyyy, inv3 );
+    inv3 = spu_madd( inv2, zzzz, inv3 );
+    return Transform3(
+        Vector3( inv0 ),
+        Vector3( inv1 ),
+        Vector3( inv2 ),
+        Vector3( inv3 )
+    );
+}
+
+inline const Transform3 absPerElem( const Transform3 & tfrm )
+{
+    return Transform3(
+        absPerElem( tfrm.getCol0() ),
+        absPerElem( tfrm.getCol1() ),
+        absPerElem( tfrm.getCol2() ),
+        absPerElem( tfrm.getCol3() )
+    );
+}
+
+inline const Vector3 Transform3::operator *( Vector3 vec ) const
+{
+    vec_float4 res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    xxxx = spu_shuffle( vec.get128(), vec.get128(), shuffle_xxxx );
+    yyyy = spu_shuffle( vec.get128(), vec.get128(), shuffle_yyyy );
+    zzzz = spu_shuffle( vec.get128(), vec.get128(), shuffle_zzzz );
+    res = spu_mul( mCol0.get128(), xxxx );
+    res = spu_madd( mCol1.get128(), yyyy, res );
+    res = spu_madd( mCol2.get128(), zzzz, res );
+    return Vector3( res );
+}
+
+inline const Point3 Transform3::operator *( Point3 pnt ) const
+{
+    vec_float4 tmp0, tmp1, res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    xxxx = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_xxxx );
+    yyyy = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_yyyy );
+    zzzz = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_zzzz );
+    tmp0 = spu_mul( mCol0.get128(), xxxx );
+    tmp1 = spu_mul( mCol1.get128(), yyyy );
+    tmp0 = spu_madd( mCol2.get128(), zzzz, tmp0 );
+    tmp1 = spu_add( mCol3.get128(), tmp1 );
+    res = spu_add( tmp0, tmp1 );
+    return Point3( res );
+}
+
+inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
+{
+    return Transform3(
+        ( *this * tfrm.mCol0 ),
+        ( *this * tfrm.mCol1 ),
+        ( *this * tfrm.mCol2 ),
+        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
+    );
+}
+
+inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
+{
+    return Transform3(
+        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
+        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
+        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
+        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
+    );
+}
+
+inline const Transform3 Transform3::identity( )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        Vector3( 0.0f )
+    );
+}
+
+inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
+{
+    mCol0 = tfrm.getCol0();
+    mCol1 = tfrm.getCol1();
+    mCol2 = tfrm.getCol2();
+    return *this;
+}
+
+inline const Matrix3 Transform3::getUpper3x3( ) const
+{
+    return Matrix3( mCol0, mCol1, mCol2 );
+}
+
+inline Transform3 & Transform3::setTranslation( Vector3 translateVec )
+{
+    mCol3 = translateVec;
+    return *this;
+}
+
+inline const Vector3 Transform3::getTranslation( ) const
+{
+    return mCol3;
+}
+
+inline const Transform3 Transform3::rotationX( float radians )
+{
+    vec_float4 s, c, res1, res2;
+    vec_uint4 select_y, select_z;
+    vec_float4 zero;
+    select_y = (vec_uint4)spu_maskb(0x0f00);
+    select_z = (vec_uint4)spu_maskb(0x00f0);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res1 = spu_sel( zero, c, select_y );
+    res1 = spu_sel( res1, s, select_z );
+    res2 = spu_sel( zero, negatef4(s), select_y );
+    res2 = spu_sel( res2, c, select_z );
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3( res1 ),
+        Vector3( res2 ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationY( float radians )
+{
+    vec_float4 s, c, res0, res2;
+    vec_uint4 select_x, select_z;
+    vec_float4 zero;
+    select_x = (vec_uint4)spu_maskb(0xf000);
+    select_z = (vec_uint4)spu_maskb(0x00f0);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res0 = spu_sel( zero, c, select_x );
+    res0 = spu_sel( res0, negatef4(s), select_z );
+    res2 = spu_sel( zero, s, select_x );
+    res2 = spu_sel( res2, c, select_z );
+    return Transform3(
+        Vector3( res0 ),
+        Vector3::yAxis( ),
+        Vector3( res2 ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationZ( float radians )
+{
+    vec_float4 s, c, res0, res1;
+    vec_uint4 select_x, select_y;
+    vec_float4 zero;
+    select_x = (vec_uint4)spu_maskb(0xf000);
+    select_y = (vec_uint4)spu_maskb(0x0f00);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res0 = spu_sel( zero, c, select_x );
+    res0 = spu_sel( res0, s, select_y );
+    res1 = spu_sel( zero, negatef4(s), select_x );
+    res1 = spu_sel( res1, c, select_y );
+    return Transform3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3::zAxis( ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationZYX( Vector3 radiansXYZ )
+{
+    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    angles = radiansXYZ.get128();
+    angles = spu_insert( 0.0f, angles, 3 );
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
+    Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
+    Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
+    Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
+    X0 = spu_shuffle( s, s, shuffle_xxxx );
+    X1 = spu_shuffle( c, c, shuffle_xxxx );
+    tmp = spu_mul( Z0, Y1 );
+    return Transform3(
+        Vector3( spu_mul( Z0, Y0 ) ),
+        Vector3( spu_madd( Z1, X1, spu_mul( tmp, X0 ) ) ),
+        Vector3( spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) ) ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotation( float radians, Vector3 unitVec )
+{
+    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) );
+}
+
+inline const Transform3 Transform3::rotation( Quat unitQuat )
+{
+    return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) );
+}
+
+inline const Transform3 Transform3::scale( Vector3 scaleVec )
+{
+    vec_float4 zero = spu_splats(0.0f);
+    return Transform3(
+        Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0xf000) ) ),
+        Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x0f00) ) ),
+        Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x00f0) ) ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 appendScale( const Transform3 & tfrm, Vector3 scaleVec )
+{
+    return Transform3(
+        ( tfrm.getCol0() * scaleVec.getX( ) ),
+        ( tfrm.getCol1() * scaleVec.getY( ) ),
+        ( tfrm.getCol2() * scaleVec.getZ( ) ),
+        tfrm.getCol3()
+    );
+}
+
+inline const Transform3 prependScale( Vector3 scaleVec, const Transform3 & tfrm )
+{
+    return Transform3(
+        mulPerElem( tfrm.getCol0(), scaleVec ),
+        mulPerElem( tfrm.getCol1(), scaleVec ),
+        mulPerElem( tfrm.getCol2(), scaleVec ),
+        mulPerElem( tfrm.getCol3(), scaleVec )
+    );
+}
+
+inline const Transform3 Transform3::translation( Vector3 translateVec )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        translateVec
+    );
+}
+
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 )
+{
+    return Transform3(
+        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
+        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
+        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
+        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Transform3 & tfrm )
+{
+    print( tfrm.getRow( 0 ) );
+    print( tfrm.getRow( 1 ) );
+    print( tfrm.getRow( 2 ) );
+}
+
+inline void print( const Transform3 & tfrm, const char * name )
+{
+    printf("%s:\n", name);
+    print( tfrm );
+}
+
+#endif
+
+inline Quat::Quat( const Matrix3 & tfrm )
+{
+    vec_float4 res;
+    vec_float4 col0, col1, col2;
+    vec_float4 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff;
+    vec_float4 zy_xz_yx, yz_zx_xy, sum, diff;
+    vec_float4 radicand, invSqrt, scale;
+    vec_float4 res0, res1, res2, res3;
+    vec_float4 xx, yy, zz;
+    vec_uint4 select_x = (vec_uint4)spu_maskb( 0xf000 );
+    vec_uint4 select_y = (vec_uint4)spu_maskb( 0x0f00 );
+    vec_uint4 select_z = (vec_uint4)spu_maskb( 0x00f0 );
+    vec_uint4 select_w = (vec_uint4)spu_maskb( 0x000f );
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((unsigned int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((unsigned int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((unsigned int)0x08090a0b);
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((unsigned int)0x0c0d0e0f);
+
+    col0 = tfrm.getCol0().get128();
+    col1 = tfrm.getCol1().get128();
+    col2 = tfrm.getCol2().get128();
+
+    /* four cases: */
+    /* trace > 0 */
+    /* else */
+    /*    xx largest diagonal element */
+    /*    yy largest diagonal element */
+    /*    zz largest diagonal element */
+
+    /* compute quaternion for each case */
+
+    xx_yy = spu_sel( col0, col1, select_y );
+    xx_yy_zz_xx = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_XYCX );
+    yy_zz_xx_yy = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_YCXY );
+    zz_xx_yy_zz = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_CXYC );
+
+    diagSum = spu_add( spu_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
+    diagDiff = spu_sub( spu_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
+    radicand = spu_add( spu_sel( diagDiff, diagSum, select_w ), spu_splats(1.0f) );
+    invSqrt = rsqrtf4( radicand );
+
+    zy_xz_yx = spu_sel( col0, col1, select_z );
+    zy_xz_yx = spu_shuffle( zy_xz_yx, col2, _VECTORMATH_SHUF_ZAY0 );
+    yz_zx_xy = spu_sel( col0, col1, select_x );
+    yz_zx_xy = spu_shuffle( yz_zx_xy, col2, _VECTORMATH_SHUF_BZX0 );
+
+    sum = spu_add( zy_xz_yx, yz_zx_xy );
+    diff = spu_sub( zy_xz_yx, yz_zx_xy );
+
+    scale = spu_mul( invSqrt, spu_splats(0.5f) );
+    res0 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_0ZYA );
+    res1 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_Z0XB );
+    res2 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_YX0C );
+    res3 = diff;
+    res0 = spu_sel( res0, radicand, select_x );
+    res1 = spu_sel( res1, radicand, select_y );
+    res2 = spu_sel( res2, radicand, select_z );
+    res3 = spu_sel( res3, radicand, select_w );
+    res0 = spu_mul( res0, spu_shuffle( scale, scale, shuffle_xxxx ) );
+    res1 = spu_mul( res1, spu_shuffle( scale, scale, shuffle_yyyy ) );
+    res2 = spu_mul( res2, spu_shuffle( scale, scale, shuffle_zzzz ) );
+    res3 = spu_mul( res3, spu_shuffle( scale, scale, shuffle_wwww ) );
+
+    /* determine case and select answer */
+
+    xx = spu_shuffle( col0, col0, shuffle_xxxx );
+    yy = spu_shuffle( col1, col1, shuffle_yyyy );
+    zz = spu_shuffle( col2, col2, shuffle_zzzz );
+    res = spu_sel( res0, res1, spu_cmpgt( yy, xx ) );
+    res = spu_sel( res, res2, spu_and( spu_cmpgt( zz, xx ), spu_cmpgt( zz, yy ) ) );
+    res = spu_sel( res, res3, spu_cmpgt( spu_shuffle( diagSum, diagSum, shuffle_xxxx ), spu_splats(0.0f) ) );
+    mVec128 = res;
+}
+
+inline const Matrix3 outer( Vector3 tfrm0, Vector3 tfrm1 )
+{
+    return Matrix3(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) )
+    );
+}
+
+inline const Matrix4 outer( Vector4 tfrm0, Vector4 tfrm1 )
+{
+    return Matrix4(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) ),
+        ( tfrm0 * tfrm1.getW( ) )
+    );
+}
+
+inline const Vector3 rowMul( Vector3 vec, const Matrix3 & mat )
+{
+    vec_float4 tmp0, tmp1, mcol0, mcol1, mcol2, res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    tmp0 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_ZCWD );
+    xxxx = spu_shuffle( vec.get128(), vec.get128(), shuffle_xxxx );
+    mcol0 = spu_shuffle( tmp0, mat.getCol1().get128(), _VECTORMATH_SHUF_XAYB );
+    mcol1 = spu_shuffle( tmp0, mat.getCol1().get128(), _VECTORMATH_SHUF_ZBW0 );
+    mcol2 = spu_shuffle( tmp1, mat.getCol1().get128(), _VECTORMATH_SHUF_XCY0 );
+    yyyy = spu_shuffle( vec.get128(), vec.get128(), shuffle_yyyy );
+    res = spu_mul( mcol0, xxxx );
+    zzzz = spu_shuffle( vec.get128(), vec.get128(), shuffle_zzzz );
+    res = spu_madd( mcol1, yyyy, res );
+    res = spu_madd( mcol2, zzzz, res );
+    return Vector3( res );
+}
+
+inline const Matrix3 crossMatrix( Vector3 vec )
+{
+    vec_float4 neg, res0, res1, res2;
+    neg = negatef4( vec.get128() );
+    res0 = spu_shuffle( vec.get128(), neg, _VECTORMATH_SHUF_0ZB0 );
+    res1 = spu_shuffle( vec.get128(), neg, _VECTORMATH_SHUF_C0X0 );
+    res2 = spu_shuffle( vec.get128(), neg, _VECTORMATH_SHUF_YA00 );
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3( res2 )
+    );
+}
+
+inline const Matrix3 crossMatrixMul( Vector3 vec, const Matrix3 & mat )
+{
+    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
+}
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/cpp/mat_soa.h b/Extras/vectormathlibrary/include/vectormath/spu/cpp/mat_soa.h
index ef43e7b00..8c884d593 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/cpp/mat_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/cpp/mat_soa.h
@@ -1,1744 +1,1744 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_SOA_CPP_H
-#define _VECTORMATH_MAT_SOA_CPP_H
-
-namespace Vectormath {
-namespace Soa {
-
-//-----------------------------------------------------------------------------
-// Constants
-
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-inline Matrix3::Matrix3( const Matrix3 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-}
-
-inline Matrix3::Matrix3( vec_float4 scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-}
-
-inline Matrix3::Matrix3( const Quat & unitQuat )
-{
-    vec_float4 qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
-    qx = unitQuat.getX();
-    qy = unitQuat.getY();
-    qz = unitQuat.getZ();
-    qw = unitQuat.getW();
-    qx2 = spu_add( qx, qx );
-    qy2 = spu_add( qy, qy );
-    qz2 = spu_add( qz, qz );
-    qxqx2 = spu_mul( qx, qx2 );
-    qxqy2 = spu_mul( qx, qy2 );
-    qxqz2 = spu_mul( qx, qz2 );
-    qxqw2 = spu_mul( qw, qx2 );
-    qyqy2 = spu_mul( qy, qy2 );
-    qyqz2 = spu_mul( qy, qz2 );
-    qyqw2 = spu_mul( qw, qy2 );
-    qzqz2 = spu_mul( qz, qz2 );
-    qzqw2 = spu_mul( qw, qz2 );
-    mCol0 = Vector3( spu_sub( spu_sub( spu_splats(1.0f), qyqy2 ), qzqz2 ), spu_add( qxqy2, qzqw2 ), spu_sub( qxqz2, qyqw2 ) );
-    mCol1 = Vector3( spu_sub( qxqy2, qzqw2 ), spu_sub( spu_sub( spu_splats(1.0f), qxqx2 ), qzqz2 ), spu_add( qyqz2, qxqw2 ) );
-    mCol2 = Vector3( spu_add( qxqz2, qyqw2 ), spu_sub( qyqz2, qxqw2 ), spu_sub( spu_sub( spu_splats(1.0f), qxqx2 ), qyqy2 ) );
-}
-
-inline Matrix3::Matrix3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-}
-
-inline Matrix3::Matrix3( const Aos::Matrix3 & mat )
-{
-    mCol0 = Vector3( mat.getCol0() );
-    mCol1 = Vector3( mat.getCol1() );
-    mCol2 = Vector3( mat.getCol2() );
-}
-
-inline Matrix3::Matrix3( const Aos::Matrix3 & mat0, const Aos::Matrix3 & mat1, const Aos::Matrix3 & mat2, const Aos::Matrix3 & mat3 )
-{
-    mCol0 = Vector3( mat0.getCol0(), mat1.getCol0(), mat2.getCol0(), mat3.getCol0() );
-    mCol1 = Vector3( mat0.getCol1(), mat1.getCol1(), mat2.getCol1(), mat3.getCol1() );
-    mCol2 = Vector3( mat0.getCol2(), mat1.getCol2(), mat2.getCol2(), mat3.getCol2() );
-}
-
-inline void Matrix3::get4Aos( Aos::Matrix3 & result0, Aos::Matrix3 & result1, Aos::Matrix3 & result2, Aos::Matrix3 & result3 ) const
-{
-    Aos::Vector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    mCol0.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol0( tmpV3_0 );
-    result1.setCol0( tmpV3_1 );
-    result2.setCol0( tmpV3_2 );
-    result3.setCol0( tmpV3_3 );
-    mCol1.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol1( tmpV3_0 );
-    result1.setCol1( tmpV3_1 );
-    result2.setCol1( tmpV3_2 );
-    result3.setCol1( tmpV3_3 );
-    mCol2.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol2( tmpV3_0 );
-    result1.setCol2( tmpV3_1 );
-    result2.setCol2( tmpV3_2 );
-    result3.setCol2( tmpV3_3 );
-}
-
-inline Matrix3 & Matrix3::setCol0( const Vector3 & _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol1( const Vector3 & _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol2( const Vector3 & _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol( int col, const Vector3 & vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setRow( int row, const Vector3 & vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setElem( int col, int row, vec_float4 val )
-{
-    Vector3 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline vec_float4 Matrix3::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector3 Matrix3::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector3 Matrix3::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector3 Matrix3::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector3 Matrix3::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Matrix3::getRow( int row ) const
-{
-    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
-}
-
-inline Vector3 & Matrix3::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Matrix3::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    return *this;
-}
-
-inline const Matrix3 transpose( const Matrix3 & mat )
-{
-    return Matrix3(
-        Vector3( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX() ),
-        Vector3( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY() ),
-        Vector3( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ() )
-    );
-}
-
-inline const Matrix3 inverse( const Matrix3 & mat )
-{
-    Vector3 tmp0, tmp1, tmp2;
-    vec_float4 detinv;
-    tmp0 = cross( mat.getCol1(), mat.getCol2() );
-    tmp1 = cross( mat.getCol2(), mat.getCol0() );
-    tmp2 = cross( mat.getCol0(), mat.getCol1() );
-    detinv = recipf4( dot( mat.getCol2(), tmp2 ) );
-    return Matrix3(
-        Vector3( spu_mul( tmp0.getX(), detinv ), spu_mul( tmp1.getX(), detinv ), spu_mul( tmp2.getX(), detinv ) ),
-        Vector3( spu_mul( tmp0.getY(), detinv ), spu_mul( tmp1.getY(), detinv ), spu_mul( tmp2.getY(), detinv ) ),
-        Vector3( spu_mul( tmp0.getZ(), detinv ), spu_mul( tmp1.getZ(), detinv ), spu_mul( tmp2.getZ(), detinv ) )
-    );
-}
-
-inline vec_float4 determinant( const Matrix3 & mat )
-{
-    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
-}
-
-inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( mCol0 + mat.mCol0 ),
-        ( mCol1 + mat.mCol1 ),
-        ( mCol2 + mat.mCol2 )
-    );
-}
-
-inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( mCol0 - mat.mCol0 ),
-        ( mCol1 - mat.mCol1 ),
-        ( mCol2 - mat.mCol2 )
-    );
-}
-
-inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
-{
-    *this = *this + mat;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
-{
-    *this = *this - mat;
-    return *this;
-}
-
-inline const Matrix3 Matrix3::operator -( ) const
-{
-    return Matrix3(
-        ( -mCol0 ),
-        ( -mCol1 ),
-        ( -mCol2 )
-    );
-}
-
-inline const Matrix3 absPerElem( const Matrix3 & mat )
-{
-    return Matrix3(
-        absPerElem( mat.getCol0() ),
-        absPerElem( mat.getCol1() ),
-        absPerElem( mat.getCol2() )
-    );
-}
-
-inline const Matrix3 Matrix3::operator *( vec_float4 scalar ) const
-{
-    return Matrix3(
-        ( mCol0 * scalar ),
-        ( mCol1 * scalar ),
-        ( mCol2 * scalar )
-    );
-}
-
-inline Matrix3 & Matrix3::operator *=( vec_float4 scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Matrix3 operator *( vec_float4 scalar, const Matrix3 & mat )
-{
-    return mat * scalar;
-}
-
-inline const Vector3 Matrix3::operator *( const Vector3 & vec ) const
-{
-    return Vector3(
-        spu_add( spu_add( spu_mul( mCol0.getX(), vec.getX() ), spu_mul( mCol1.getX(), vec.getY() ) ), spu_mul( mCol2.getX(), vec.getZ() ) ),
-        spu_add( spu_add( spu_mul( mCol0.getY(), vec.getX() ), spu_mul( mCol1.getY(), vec.getY() ) ), spu_mul( mCol2.getY(), vec.getZ() ) ),
-        spu_add( spu_add( spu_mul( mCol0.getZ(), vec.getX() ), spu_mul( mCol1.getZ(), vec.getY() ) ), spu_mul( mCol2.getZ(), vec.getZ() ) )
-    );
-}
-
-inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( *this * mat.mCol0 ),
-        ( *this * mat.mCol1 ),
-        ( *this * mat.mCol2 )
-    );
-}
-
-inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
-{
-    *this = *this * mat;
-    return *this;
-}
-
-inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
-{
-    return Matrix3(
-        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
-        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
-        mulPerElem( mat0.getCol2(), mat1.getCol2() )
-    );
-}
-
-inline const Matrix3 Matrix3::identity( )
-{
-    return Matrix3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationX( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Matrix3(
-        Vector3::xAxis( ),
-        Vector3( spu_splats(0.0f), c, s ),
-        Vector3( spu_splats(0.0f), negatef4( s ), c )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationY( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Matrix3(
-        Vector3( c, spu_splats(0.0f), negatef4( s ) ),
-        Vector3::yAxis( ),
-        Vector3( s, spu_splats(0.0f), c )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationZ( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Matrix3(
-        Vector3( c, s, spu_splats(0.0f) ),
-        Vector3( negatef4( s ), c, spu_splats(0.0f) ),
-        Vector3::zAxis( )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationZYX( const Vector3 & radiansXYZ )
-{
-    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sincosf4( radiansXYZ.getX(), &sX, &cX );
-    sincosf4( radiansXYZ.getY(), &sY, &cY );
-    sincosf4( radiansXYZ.getZ(), &sZ, &cZ );
-    tmp0 = spu_mul( cZ, sY );
-    tmp1 = spu_mul( sZ, sY );
-    return Matrix3(
-        Vector3( spu_mul( cZ, cY ), spu_mul( sZ, cY ), negatef4( sY ) ),
-        Vector3( spu_sub( spu_mul( tmp0, sX ), spu_mul( sZ, cX ) ), spu_add( spu_mul( tmp1, sX ), spu_mul( cZ, cX ) ), spu_mul( cY, sX ) ),
-        Vector3( spu_add( spu_mul( tmp0, cX ), spu_mul( sZ, sX ) ), spu_sub( spu_mul( tmp1, cX ), spu_mul( cZ, sX ) ), spu_mul( cY, cX ) )
-    );
-}
-
-inline const Matrix3 Matrix3::rotation( vec_float4 radians, const Vector3 & unitVec )
-{
-    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
-    sincosf4( radians, &s, &c );
-    x = unitVec.getX();
-    y = unitVec.getY();
-    z = unitVec.getZ();
-    xy = spu_mul( x, y );
-    yz = spu_mul( y, z );
-    zx = spu_mul( z, x );
-    oneMinusC = spu_sub( spu_splats(1.0f), c );
-    return Matrix3(
-        Vector3( spu_add( spu_mul( spu_mul( x, x ), oneMinusC ), c ), spu_add( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_sub( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ) ),
-        Vector3( spu_sub( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_add( spu_mul( spu_mul( y, y ), oneMinusC ), c ), spu_add( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ) ),
-        Vector3( spu_add( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ), spu_sub( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ), spu_add( spu_mul( spu_mul( z, z ), oneMinusC ), c ) )
-    );
-}
-
-inline const Matrix3 Matrix3::rotation( const Quat & unitQuat )
-{
-    return Matrix3( unitQuat );
-}
-
-inline const Matrix3 Matrix3::scale( const Vector3 & scaleVec )
-{
-    return Matrix3(
-        Vector3( scaleVec.getX(), spu_splats(0.0f), spu_splats(0.0f) ),
-        Vector3( spu_splats(0.0f), scaleVec.getY(), spu_splats(0.0f) ),
-        Vector3( spu_splats(0.0f), spu_splats(0.0f), scaleVec.getZ() )
-    );
-}
-
-inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec )
-{
-    return Matrix3(
-        ( mat.getCol0() * scaleVec.getX( ) ),
-        ( mat.getCol1() * scaleVec.getY( ) ),
-        ( mat.getCol2() * scaleVec.getZ( ) )
-    );
-}
-
-inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat )
-{
-    return Matrix3(
-        mulPerElem( mat.getCol0(), scaleVec ),
-        mulPerElem( mat.getCol1(), scaleVec ),
-        mulPerElem( mat.getCol2(), scaleVec )
-    );
-}
-
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, vec_uint4 select1 )
-{
-    return Matrix3(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Matrix3 & mat )
-{
-    Aos::Matrix3 mat0, mat1, mat2, mat3;
-    mat.get4Aos( mat0, mat1, mat2, mat3 );
-    printf("slot 0:\n");
-    print( mat0 );
-    printf("slot 1:\n");
-    print( mat1 );
-    printf("slot 2:\n");
-    print( mat2 );
-    printf("slot 3:\n");
-    print( mat3 );
-}
-
-inline void print( const Matrix3 & mat, const char * name )
-{
-    printf("%s:\n", name);
-    print( mat );
-}
-
-#endif
-
-inline Matrix4::Matrix4( const Matrix4 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    mCol3 = mat.mCol3;
-}
-
-inline Matrix4::Matrix4( vec_float4 scalar )
-{
-    mCol0 = Vector4( scalar );
-    mCol1 = Vector4( scalar );
-    mCol2 = Vector4( scalar );
-    mCol3 = Vector4( scalar );
-}
-
-inline Matrix4::Matrix4( const Transform3 & mat )
-{
-    mCol0 = Vector4( mat.getCol0(), spu_splats(0.0f) );
-    mCol1 = Vector4( mat.getCol1(), spu_splats(0.0f) );
-    mCol2 = Vector4( mat.getCol2(), spu_splats(0.0f) );
-    mCol3 = Vector4( mat.getCol3(), spu_splats(1.0f) );
-}
-
-inline Matrix4::Matrix4( const Vector4 & _col0, const Vector4 & _col1, const Vector4 & _col2, const Vector4 & _col3 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-    mCol3 = _col3;
-}
-
-inline Matrix4::Matrix4( const Matrix3 & mat, const Vector3 & translateVec )
-{
-    mCol0 = Vector4( mat.getCol0(), spu_splats(0.0f) );
-    mCol1 = Vector4( mat.getCol1(), spu_splats(0.0f) );
-    mCol2 = Vector4( mat.getCol2(), spu_splats(0.0f) );
-    mCol3 = Vector4( translateVec, spu_splats(1.0f) );
-}
-
-inline Matrix4::Matrix4( const Quat & unitQuat, const Vector3 & translateVec )
-{
-    Matrix3 mat;
-    mat = Matrix3( unitQuat );
-    mCol0 = Vector4( mat.getCol0(), spu_splats(0.0f) );
-    mCol1 = Vector4( mat.getCol1(), spu_splats(0.0f) );
-    mCol2 = Vector4( mat.getCol2(), spu_splats(0.0f) );
-    mCol3 = Vector4( translateVec, spu_splats(1.0f) );
-}
-
-inline Matrix4::Matrix4( const Aos::Matrix4 & mat )
-{
-    mCol0 = Vector4( mat.getCol0() );
-    mCol1 = Vector4( mat.getCol1() );
-    mCol2 = Vector4( mat.getCol2() );
-    mCol3 = Vector4( mat.getCol3() );
-}
-
-inline Matrix4::Matrix4( const Aos::Matrix4 & mat0, const Aos::Matrix4 & mat1, const Aos::Matrix4 & mat2, const Aos::Matrix4 & mat3 )
-{
-    mCol0 = Vector4( mat0.getCol0(), mat1.getCol0(), mat2.getCol0(), mat3.getCol0() );
-    mCol1 = Vector4( mat0.getCol1(), mat1.getCol1(), mat2.getCol1(), mat3.getCol1() );
-    mCol2 = Vector4( mat0.getCol2(), mat1.getCol2(), mat2.getCol2(), mat3.getCol2() );
-    mCol3 = Vector4( mat0.getCol3(), mat1.getCol3(), mat2.getCol3(), mat3.getCol3() );
-}
-
-inline void Matrix4::get4Aos( Aos::Matrix4 & result0, Aos::Matrix4 & result1, Aos::Matrix4 & result2, Aos::Matrix4 & result3 ) const
-{
-    Aos::Vector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
-    mCol0.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
-    result0.setCol0( tmpV4_0 );
-    result1.setCol0( tmpV4_1 );
-    result2.setCol0( tmpV4_2 );
-    result3.setCol0( tmpV4_3 );
-    mCol1.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
-    result0.setCol1( tmpV4_0 );
-    result1.setCol1( tmpV4_1 );
-    result2.setCol1( tmpV4_2 );
-    result3.setCol1( tmpV4_3 );
-    mCol2.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
-    result0.setCol2( tmpV4_0 );
-    result1.setCol2( tmpV4_1 );
-    result2.setCol2( tmpV4_2 );
-    result3.setCol2( tmpV4_3 );
-    mCol3.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
-    result0.setCol3( tmpV4_0 );
-    result1.setCol3( tmpV4_1 );
-    result2.setCol3( tmpV4_2 );
-    result3.setCol3( tmpV4_3 );
-}
-
-inline Matrix4 & Matrix4::setCol0( const Vector4 & _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol1( const Vector4 & _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol2( const Vector4 & _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol3( const Vector4 & _col3 )
-{
-    mCol3 = _col3;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol( int col, const Vector4 & vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setRow( int row, const Vector4 & vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    mCol3.setElem( row, vec.getElem( 3 ) );
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setElem( int col, int row, vec_float4 val )
-{
-    Vector4 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline vec_float4 Matrix4::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector4 Matrix4::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector4 Matrix4::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector4 Matrix4::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector4 Matrix4::getCol3( ) const
-{
-    return mCol3;
-}
-
-inline const Vector4 Matrix4::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Matrix4::getRow( int row ) const
-{
-    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
-}
-
-inline Vector4 & Matrix4::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Matrix4::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    mCol3 = mat.mCol3;
-    return *this;
-}
-
-inline const Matrix4 transpose( const Matrix4 & mat )
-{
-    return Matrix4(
-        Vector4( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX(), mat.getCol3().getX() ),
-        Vector4( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY(), mat.getCol3().getY() ),
-        Vector4( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ(), mat.getCol3().getZ() ),
-        Vector4( mat.getCol0().getW(), mat.getCol1().getW(), mat.getCol2().getW(), mat.getCol3().getW() )
-    );
-}
-
-inline const Matrix4 inverse( const Matrix4 & mat )
-{
-    Vector4 res0, res1, res2, res3;
-    vec_float4 mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
-    mA = mat.getCol0().getX();
-    mB = mat.getCol0().getY();
-    mC = mat.getCol0().getZ();
-    mD = mat.getCol0().getW();
-    mE = mat.getCol1().getX();
-    mF = mat.getCol1().getY();
-    mG = mat.getCol1().getZ();
-    mH = mat.getCol1().getW();
-    mI = mat.getCol2().getX();
-    mJ = mat.getCol2().getY();
-    mK = mat.getCol2().getZ();
-    mL = mat.getCol2().getW();
-    mM = mat.getCol3().getX();
-    mN = mat.getCol3().getY();
-    mO = mat.getCol3().getZ();
-    mP = mat.getCol3().getW();
-    tmp0 = spu_sub( spu_mul( mK, mD ), spu_mul( mC, mL ) );
-    tmp1 = spu_sub( spu_mul( mO, mH ), spu_mul( mG, mP ) );
-    tmp2 = spu_sub( spu_mul( mB, mK ), spu_mul( mJ, mC ) );
-    tmp3 = spu_sub( spu_mul( mF, mO ), spu_mul( mN, mG ) );
-    tmp4 = spu_sub( spu_mul( mJ, mD ), spu_mul( mB, mL ) );
-    tmp5 = spu_sub( spu_mul( mN, mH ), spu_mul( mF, mP ) );
-    res0.setX( spu_sub( spu_sub( spu_mul( mJ, tmp1 ), spu_mul( mL, tmp3 ) ), spu_mul( mK, tmp5 ) ) );
-    res0.setY( spu_sub( spu_sub( spu_mul( mN, tmp0 ), spu_mul( mP, tmp2 ) ), spu_mul( mO, tmp4 ) ) );
-    res0.setZ( spu_sub( spu_add( spu_mul( mD, tmp3 ), spu_mul( mC, tmp5 ) ), spu_mul( mB, tmp1 ) ) );
-    res0.setW( spu_sub( spu_add( spu_mul( mH, tmp2 ), spu_mul( mG, tmp4 ) ), spu_mul( mF, tmp0 ) ) );
-    detInv = recipf4( spu_add( spu_add( spu_add( spu_mul( mA, res0.getX() ), spu_mul( mE, res0.getY() ) ), spu_mul( mI, res0.getZ() ) ), spu_mul( mM, res0.getW() ) ) );
-    res1.setX( spu_mul( mI, tmp1 ) );
-    res1.setY( spu_mul( mM, tmp0 ) );
-    res1.setZ( spu_mul( mA, tmp1 ) );
-    res1.setW( spu_mul( mE, tmp0 ) );
-    res3.setX( spu_mul( mI, tmp3 ) );
-    res3.setY( spu_mul( mM, tmp2 ) );
-    res3.setZ( spu_mul( mA, tmp3 ) );
-    res3.setW( spu_mul( mE, tmp2 ) );
-    res2.setX( spu_mul( mI, tmp5 ) );
-    res2.setY( spu_mul( mM, tmp4 ) );
-    res2.setZ( spu_mul( mA, tmp5 ) );
-    res2.setW( spu_mul( mE, tmp4 ) );
-    tmp0 = spu_sub( spu_mul( mI, mB ), spu_mul( mA, mJ ) );
-    tmp1 = spu_sub( spu_mul( mM, mF ), spu_mul( mE, mN ) );
-    tmp2 = spu_sub( spu_mul( mI, mD ), spu_mul( mA, mL ) );
-    tmp3 = spu_sub( spu_mul( mM, mH ), spu_mul( mE, mP ) );
-    tmp4 = spu_sub( spu_mul( mI, mC ), spu_mul( mA, mK ) );
-    tmp5 = spu_sub( spu_mul( mM, mG ), spu_mul( mE, mO ) );
-    res2.setX( spu_add( spu_sub( spu_mul( mL, tmp1 ), spu_mul( mJ, tmp3 ) ), res2.getX() ) );
-    res2.setY( spu_add( spu_sub( spu_mul( mP, tmp0 ), spu_mul( mN, tmp2 ) ), res2.getY() ) );
-    res2.setZ( spu_sub( spu_sub( spu_mul( mB, tmp3 ), spu_mul( mD, tmp1 ) ), res2.getZ() ) );
-    res2.setW( spu_sub( spu_sub( spu_mul( mF, tmp2 ), spu_mul( mH, tmp0 ) ), res2.getW() ) );
-    res3.setX( spu_add( spu_sub( spu_mul( mJ, tmp5 ), spu_mul( mK, tmp1 ) ), res3.getX() ) );
-    res3.setY( spu_add( spu_sub( spu_mul( mN, tmp4 ), spu_mul( mO, tmp0 ) ), res3.getY() ) );
-    res3.setZ( spu_sub( spu_sub( spu_mul( mC, tmp1 ), spu_mul( mB, tmp5 ) ), res3.getZ() ) );
-    res3.setW( spu_sub( spu_sub( spu_mul( mG, tmp0 ), spu_mul( mF, tmp4 ) ), res3.getW() ) );
-    res1.setX( spu_sub( spu_sub( spu_mul( mK, tmp3 ), spu_mul( mL, tmp5 ) ), res1.getX() ) );
-    res1.setY( spu_sub( spu_sub( spu_mul( mO, tmp2 ), spu_mul( mP, tmp4 ) ), res1.getY() ) );
-    res1.setZ( spu_add( spu_sub( spu_mul( mD, tmp5 ), spu_mul( mC, tmp3 ) ), res1.getZ() ) );
-    res1.setW( spu_add( spu_sub( spu_mul( mH, tmp4 ), spu_mul( mG, tmp2 ) ), res1.getW() ) );
-    return Matrix4(
-        ( res0 * detInv ),
-        ( res1 * detInv ),
-        ( res2 * detInv ),
-        ( res3 * detInv )
-    );
-}
-
-inline const Matrix4 affineInverse( const Matrix4 & mat )
-{
-    Transform3 affineMat;
-    affineMat.setCol0( mat.getCol0().getXYZ( ) );
-    affineMat.setCol1( mat.getCol1().getXYZ( ) );
-    affineMat.setCol2( mat.getCol2().getXYZ( ) );
-    affineMat.setCol3( mat.getCol3().getXYZ( ) );
-    return Matrix4( inverse( affineMat ) );
-}
-
-inline const Matrix4 orthoInverse( const Matrix4 & mat )
-{
-    Transform3 affineMat;
-    affineMat.setCol0( mat.getCol0().getXYZ( ) );
-    affineMat.setCol1( mat.getCol1().getXYZ( ) );
-    affineMat.setCol2( mat.getCol2().getXYZ( ) );
-    affineMat.setCol3( mat.getCol3().getXYZ( ) );
-    return Matrix4( orthoInverse( affineMat ) );
-}
-
-inline vec_float4 determinant( const Matrix4 & mat )
-{
-    vec_float4 dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-    mA = mat.getCol0().getX();
-    mB = mat.getCol0().getY();
-    mC = mat.getCol0().getZ();
-    mD = mat.getCol0().getW();
-    mE = mat.getCol1().getX();
-    mF = mat.getCol1().getY();
-    mG = mat.getCol1().getZ();
-    mH = mat.getCol1().getW();
-    mI = mat.getCol2().getX();
-    mJ = mat.getCol2().getY();
-    mK = mat.getCol2().getZ();
-    mL = mat.getCol2().getW();
-    mM = mat.getCol3().getX();
-    mN = mat.getCol3().getY();
-    mO = mat.getCol3().getZ();
-    mP = mat.getCol3().getW();
-    tmp0 = spu_sub( spu_mul( mK, mD ), spu_mul( mC, mL ) );
-    tmp1 = spu_sub( spu_mul( mO, mH ), spu_mul( mG, mP ) );
-    tmp2 = spu_sub( spu_mul( mB, mK ), spu_mul( mJ, mC ) );
-    tmp3 = spu_sub( spu_mul( mF, mO ), spu_mul( mN, mG ) );
-    tmp4 = spu_sub( spu_mul( mJ, mD ), spu_mul( mB, mL ) );
-    tmp5 = spu_sub( spu_mul( mN, mH ), spu_mul( mF, mP ) );
-    dx = spu_sub( spu_sub( spu_mul( mJ, tmp1 ), spu_mul( mL, tmp3 ) ), spu_mul( mK, tmp5 ) );
-    dy = spu_sub( spu_sub( spu_mul( mN, tmp0 ), spu_mul( mP, tmp2 ) ), spu_mul( mO, tmp4 ) );
-    dz = spu_sub( spu_add( spu_mul( mD, tmp3 ), spu_mul( mC, tmp5 ) ), spu_mul( mB, tmp1 ) );
-    dw = spu_sub( spu_add( spu_mul( mH, tmp2 ), spu_mul( mG, tmp4 ) ), spu_mul( mF, tmp0 ) );
-    return spu_add( spu_add( spu_add( spu_mul( mA, dx ), spu_mul( mE, dy ) ), spu_mul( mI, dz ) ), spu_mul( mM, dw ) );
-}
-
-inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( mCol0 + mat.mCol0 ),
-        ( mCol1 + mat.mCol1 ),
-        ( mCol2 + mat.mCol2 ),
-        ( mCol3 + mat.mCol3 )
-    );
-}
-
-inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( mCol0 - mat.mCol0 ),
-        ( mCol1 - mat.mCol1 ),
-        ( mCol2 - mat.mCol2 ),
-        ( mCol3 - mat.mCol3 )
-    );
-}
-
-inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
-{
-    *this = *this + mat;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
-{
-    *this = *this - mat;
-    return *this;
-}
-
-inline const Matrix4 Matrix4::operator -( ) const
-{
-    return Matrix4(
-        ( -mCol0 ),
-        ( -mCol1 ),
-        ( -mCol2 ),
-        ( -mCol3 )
-    );
-}
-
-inline const Matrix4 absPerElem( const Matrix4 & mat )
-{
-    return Matrix4(
-        absPerElem( mat.getCol0() ),
-        absPerElem( mat.getCol1() ),
-        absPerElem( mat.getCol2() ),
-        absPerElem( mat.getCol3() )
-    );
-}
-
-inline const Matrix4 Matrix4::operator *( vec_float4 scalar ) const
-{
-    return Matrix4(
-        ( mCol0 * scalar ),
-        ( mCol1 * scalar ),
-        ( mCol2 * scalar ),
-        ( mCol3 * scalar )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( vec_float4 scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Matrix4 operator *( vec_float4 scalar, const Matrix4 & mat )
-{
-    return mat * scalar;
-}
-
-inline const Vector4 Matrix4::operator *( const Vector4 & vec ) const
-{
-    return Vector4(
-        spu_add( spu_add( spu_add( spu_mul( mCol0.getX(), vec.getX() ), spu_mul( mCol1.getX(), vec.getY() ) ), spu_mul( mCol2.getX(), vec.getZ() ) ), spu_mul( mCol3.getX(), vec.getW() ) ),
-        spu_add( spu_add( spu_add( spu_mul( mCol0.getY(), vec.getX() ), spu_mul( mCol1.getY(), vec.getY() ) ), spu_mul( mCol2.getY(), vec.getZ() ) ), spu_mul( mCol3.getY(), vec.getW() ) ),
-        spu_add( spu_add( spu_add( spu_mul( mCol0.getZ(), vec.getX() ), spu_mul( mCol1.getZ(), vec.getY() ) ), spu_mul( mCol2.getZ(), vec.getZ() ) ), spu_mul( mCol3.getZ(), vec.getW() ) ),
-        spu_add( spu_add( spu_add( spu_mul( mCol0.getW(), vec.getX() ), spu_mul( mCol1.getW(), vec.getY() ) ), spu_mul( mCol2.getW(), vec.getZ() ) ), spu_mul( mCol3.getW(), vec.getW() ) )
-    );
-}
-
-inline const Vector4 Matrix4::operator *( const Vector3 & vec ) const
-{
-    return Vector4(
-        spu_add( spu_add( spu_mul( mCol0.getX(), vec.getX() ), spu_mul( mCol1.getX(), vec.getY() ) ), spu_mul( mCol2.getX(), vec.getZ() ) ),
-        spu_add( spu_add( spu_mul( mCol0.getY(), vec.getX() ), spu_mul( mCol1.getY(), vec.getY() ) ), spu_mul( mCol2.getY(), vec.getZ() ) ),
-        spu_add( spu_add( spu_mul( mCol0.getZ(), vec.getX() ), spu_mul( mCol1.getZ(), vec.getY() ) ), spu_mul( mCol2.getZ(), vec.getZ() ) ),
-        spu_add( spu_add( spu_mul( mCol0.getW(), vec.getX() ), spu_mul( mCol1.getW(), vec.getY() ) ), spu_mul( mCol2.getW(), vec.getZ() ) )
-    );
-}
-
-inline const Vector4 Matrix4::operator *( const Point3 & pnt ) const
-{
-    return Vector4(
-        spu_add( spu_add( spu_add( spu_mul( mCol0.getX(), pnt.getX() ), spu_mul( mCol1.getX(), pnt.getY() ) ), spu_mul( mCol2.getX(), pnt.getZ() ) ), mCol3.getX() ),
-        spu_add( spu_add( spu_add( spu_mul( mCol0.getY(), pnt.getX() ), spu_mul( mCol1.getY(), pnt.getY() ) ), spu_mul( mCol2.getY(), pnt.getZ() ) ), mCol3.getY() ),
-        spu_add( spu_add( spu_add( spu_mul( mCol0.getZ(), pnt.getX() ), spu_mul( mCol1.getZ(), pnt.getY() ) ), spu_mul( mCol2.getZ(), pnt.getZ() ) ), mCol3.getZ() ),
-        spu_add( spu_add( spu_add( spu_mul( mCol0.getW(), pnt.getX() ), spu_mul( mCol1.getW(), pnt.getY() ) ), spu_mul( mCol2.getW(), pnt.getZ() ) ), mCol3.getW() )
-    );
-}
-
-inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( *this * mat.mCol0 ),
-        ( *this * mat.mCol1 ),
-        ( *this * mat.mCol2 ),
-        ( *this * mat.mCol3 )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
-{
-    *this = *this * mat;
-    return *this;
-}
-
-inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
-{
-    return Matrix4(
-        ( *this * tfrm.getCol0() ),
-        ( *this * tfrm.getCol1() ),
-        ( *this * tfrm.getCol2() ),
-        ( *this * Point3( tfrm.getCol3() ) )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
-{
-    *this = *this * tfrm;
-    return *this;
-}
-
-inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
-{
-    return Matrix4(
-        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
-        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
-        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
-        mulPerElem( mat0.getCol3(), mat1.getCol3() )
-    );
-}
-
-inline const Matrix4 Matrix4::identity( )
-{
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4::yAxis( ),
-        Vector4::zAxis( ),
-        Vector4::wAxis( )
-    );
-}
-
-inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
-{
-    mCol0.setXYZ( mat3.getCol0() );
-    mCol1.setXYZ( mat3.getCol1() );
-    mCol2.setXYZ( mat3.getCol2() );
-    return *this;
-}
-
-inline const Matrix3 Matrix4::getUpper3x3( ) const
-{
-    return Matrix3(
-        mCol0.getXYZ( ),
-        mCol1.getXYZ( ),
-        mCol2.getXYZ( )
-    );
-}
-
-inline Matrix4 & Matrix4::setTranslation( const Vector3 & translateVec )
-{
-    mCol3.setXYZ( translateVec );
-    return *this;
-}
-
-inline const Vector3 Matrix4::getTranslation( ) const
-{
-    return mCol3.getXYZ( );
-}
-
-inline const Matrix4 Matrix4::rotationX( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4( spu_splats(0.0f), c, s, spu_splats(0.0f) ),
-        Vector4( spu_splats(0.0f), negatef4( s ), c, spu_splats(0.0f) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationY( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Matrix4(
-        Vector4( c, spu_splats(0.0f), negatef4( s ), spu_splats(0.0f) ),
-        Vector4::yAxis( ),
-        Vector4( s, spu_splats(0.0f), c, spu_splats(0.0f) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationZ( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Matrix4(
-        Vector4( c, s, spu_splats(0.0f), spu_splats(0.0f) ),
-        Vector4( negatef4( s ), c, spu_splats(0.0f), spu_splats(0.0f) ),
-        Vector4::zAxis( ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationZYX( const Vector3 & radiansXYZ )
-{
-    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sincosf4( radiansXYZ.getX(), &sX, &cX );
-    sincosf4( radiansXYZ.getY(), &sY, &cY );
-    sincosf4( radiansXYZ.getZ(), &sZ, &cZ );
-    tmp0 = spu_mul( cZ, sY );
-    tmp1 = spu_mul( sZ, sY );
-    return Matrix4(
-        Vector4( spu_mul( cZ, cY ), spu_mul( sZ, cY ), negatef4( sY ), spu_splats(0.0f) ),
-        Vector4( spu_sub( spu_mul( tmp0, sX ), spu_mul( sZ, cX ) ), spu_add( spu_mul( tmp1, sX ), spu_mul( cZ, cX ) ), spu_mul( cY, sX ), spu_splats(0.0f) ),
-        Vector4( spu_add( spu_mul( tmp0, cX ), spu_mul( sZ, sX ) ), spu_sub( spu_mul( tmp1, cX ), spu_mul( cZ, sX ) ), spu_mul( cY, cX ), spu_splats(0.0f) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotation( vec_float4 radians, const Vector3 & unitVec )
-{
-    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
-    sincosf4( radians, &s, &c );
-    x = unitVec.getX();
-    y = unitVec.getY();
-    z = unitVec.getZ();
-    xy = spu_mul( x, y );
-    yz = spu_mul( y, z );
-    zx = spu_mul( z, x );
-    oneMinusC = spu_sub( spu_splats(1.0f), c );
-    return Matrix4(
-        Vector4( spu_add( spu_mul( spu_mul( x, x ), oneMinusC ), c ), spu_add( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_sub( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ), spu_splats(0.0f) ),
-        Vector4( spu_sub( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_add( spu_mul( spu_mul( y, y ), oneMinusC ), c ), spu_add( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ), spu_splats(0.0f) ),
-        Vector4( spu_add( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ), spu_sub( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ), spu_add( spu_mul( spu_mul( z, z ), oneMinusC ), c ), spu_splats(0.0f) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotation( const Quat & unitQuat )
-{
-    return Matrix4( Transform3::rotation( unitQuat ) );
-}
-
-inline const Matrix4 Matrix4::scale( const Vector3 & scaleVec )
-{
-    return Matrix4(
-        Vector4( scaleVec.getX(), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) ),
-        Vector4( spu_splats(0.0f), scaleVec.getY(), spu_splats(0.0f), spu_splats(0.0f) ),
-        Vector4( spu_splats(0.0f), spu_splats(0.0f), scaleVec.getZ(), spu_splats(0.0f) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec )
-{
-    return Matrix4(
-        ( mat.getCol0() * scaleVec.getX( ) ),
-        ( mat.getCol1() * scaleVec.getY( ) ),
-        ( mat.getCol2() * scaleVec.getZ( ) ),
-        mat.getCol3()
-    );
-}
-
-inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat )
-{
-    Vector4 scale4;
-    scale4 = Vector4( scaleVec, spu_splats(1.0f) );
-    return Matrix4(
-        mulPerElem( mat.getCol0(), scale4 ),
-        mulPerElem( mat.getCol1(), scale4 ),
-        mulPerElem( mat.getCol2(), scale4 ),
-        mulPerElem( mat.getCol3(), scale4 )
-    );
-}
-
-inline const Matrix4 Matrix4::translation( const Vector3 & translateVec )
-{
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4::yAxis( ),
-        Vector4::zAxis( ),
-        Vector4( translateVec, spu_splats(1.0f) )
-    );
-}
-
-inline const Matrix4 Matrix4::lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec )
-{
-    Matrix4 m4EyeFrame;
-    Vector3 v3X, v3Y, v3Z;
-    v3Y = normalize( upVec );
-    v3Z = normalize( ( eyePos - lookAtPos ) );
-    v3X = normalize( cross( v3Y, v3Z ) );
-    v3Y = cross( v3Z, v3X );
-    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
-    return orthoInverse( m4EyeFrame );
-}
-
-inline const Matrix4 Matrix4::perspective( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar )
-{
-    vec_float4 f, rangeInv;
-    f = tanf4( spu_sub( spu_splats( _VECTORMATH_PI_OVER_2 ), spu_mul( spu_splats(0.5f), fovyRadians ) ) );
-    rangeInv = recipf4( spu_sub( zNear, zFar ) );
-    return Matrix4(
-        Vector4( divf4( f, aspect ), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) ),
-        Vector4( spu_splats(0.0f), f, spu_splats(0.0f), spu_splats(0.0f) ),
-        Vector4( spu_splats(0.0f), spu_splats(0.0f), spu_mul( spu_add( zNear, zFar ), rangeInv ), spu_splats(-1.0f) ),
-        Vector4( spu_splats(0.0f), spu_splats(0.0f), spu_mul( spu_mul( spu_mul( zNear, zFar ), rangeInv ), spu_splats(2.0f) ), spu_splats(0.0f) )
-    );
-}
-
-inline const Matrix4 Matrix4::frustum( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
-{
-    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
-    sum_rl = spu_add( right, left );
-    sum_tb = spu_add( top, bottom );
-    sum_nf = spu_add( zNear, zFar );
-    inv_rl = recipf4( spu_sub( right, left ) );
-    inv_tb = recipf4( spu_sub( top, bottom ) );
-    inv_nf = recipf4( spu_sub( zNear, zFar ) );
-    n2 = spu_add( zNear, zNear );
-    return Matrix4(
-        Vector4( spu_mul( n2, inv_rl ), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) ),
-        Vector4( spu_splats(0.0f), spu_mul( n2, inv_tb ), spu_splats(0.0f), spu_splats(0.0f) ),
-        Vector4( spu_mul( sum_rl, inv_rl ), spu_mul( sum_tb, inv_tb ), spu_mul( sum_nf, inv_nf ), spu_splats(-1.0f) ),
-        Vector4( spu_splats(0.0f), spu_splats(0.0f), spu_mul( spu_mul( n2, inv_nf ), zFar ), spu_splats(0.0f) )
-    );
-}
-
-inline const Matrix4 Matrix4::orthographic( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
-{
-    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
-    sum_rl = spu_add( right, left );
-    sum_tb = spu_add( top, bottom );
-    sum_nf = spu_add( zNear, zFar );
-    inv_rl = recipf4( spu_sub( right, left ) );
-    inv_tb = recipf4( spu_sub( top, bottom ) );
-    inv_nf = recipf4( spu_sub( zNear, zFar ) );
-    return Matrix4(
-        Vector4( spu_add( inv_rl, inv_rl ), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) ),
-        Vector4( spu_splats(0.0f), spu_add( inv_tb, inv_tb ), spu_splats(0.0f), spu_splats(0.0f) ),
-        Vector4( spu_splats(0.0f), spu_splats(0.0f), spu_add( inv_nf, inv_nf ), spu_splats(0.0f) ),
-        Vector4( spu_mul( negatef4( sum_rl ), inv_rl ), spu_mul( negatef4( sum_tb ), inv_tb ), spu_mul( sum_nf, inv_nf ), spu_splats(1.0f) )
-    );
-}
-
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, vec_uint4 select1 )
-{
-    return Matrix4(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 ),
-        select( mat0.getCol3(), mat1.getCol3(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Matrix4 & mat )
-{
-    Aos::Matrix4 mat0, mat1, mat2, mat3;
-    mat.get4Aos( mat0, mat1, mat2, mat3 );
-    printf("slot 0:\n");
-    print( mat0 );
-    printf("slot 1:\n");
-    print( mat1 );
-    printf("slot 2:\n");
-    print( mat2 );
-    printf("slot 3:\n");
-    print( mat3 );
-}
-
-inline void print( const Matrix4 & mat, const char * name )
-{
-    printf("%s:\n", name);
-    print( mat );
-}
-
-#endif
-
-inline Transform3::Transform3( const Transform3 & tfrm )
-{
-    mCol0 = tfrm.mCol0;
-    mCol1 = tfrm.mCol1;
-    mCol2 = tfrm.mCol2;
-    mCol3 = tfrm.mCol3;
-}
-
-inline Transform3::Transform3( vec_float4 scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-    mCol3 = Vector3( scalar );
-}
-
-inline Transform3::Transform3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2, const Vector3 & _col3 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-    mCol3 = _col3;
-}
-
-inline Transform3::Transform3( const Matrix3 & tfrm, const Vector3 & translateVec )
-{
-    this->setUpper3x3( tfrm );
-    this->setTranslation( translateVec );
-}
-
-inline Transform3::Transform3( const Quat & unitQuat, const Vector3 & translateVec )
-{
-    this->setUpper3x3( Matrix3( unitQuat ) );
-    this->setTranslation( translateVec );
-}
-
-inline Transform3::Transform3( const Aos::Transform3 & tfrm )
-{
-    mCol0 = Vector3( tfrm.getCol0() );
-    mCol1 = Vector3( tfrm.getCol1() );
-    mCol2 = Vector3( tfrm.getCol2() );
-    mCol3 = Vector3( tfrm.getCol3() );
-}
-
-inline Transform3::Transform3( const Aos::Transform3 & tfrm0, const Aos::Transform3 & tfrm1, const Aos::Transform3 & tfrm2, const Aos::Transform3 & tfrm3 )
-{
-    mCol0 = Vector3( tfrm0.getCol0(), tfrm1.getCol0(), tfrm2.getCol0(), tfrm3.getCol0() );
-    mCol1 = Vector3( tfrm0.getCol1(), tfrm1.getCol1(), tfrm2.getCol1(), tfrm3.getCol1() );
-    mCol2 = Vector3( tfrm0.getCol2(), tfrm1.getCol2(), tfrm2.getCol2(), tfrm3.getCol2() );
-    mCol3 = Vector3( tfrm0.getCol3(), tfrm1.getCol3(), tfrm2.getCol3(), tfrm3.getCol3() );
-}
-
-inline void Transform3::get4Aos( Aos::Transform3 & result0, Aos::Transform3 & result1, Aos::Transform3 & result2, Aos::Transform3 & result3 ) const
-{
-    Aos::Vector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    mCol0.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol0( tmpV3_0 );
-    result1.setCol0( tmpV3_1 );
-    result2.setCol0( tmpV3_2 );
-    result3.setCol0( tmpV3_3 );
-    mCol1.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol1( tmpV3_0 );
-    result1.setCol1( tmpV3_1 );
-    result2.setCol1( tmpV3_2 );
-    result3.setCol1( tmpV3_3 );
-    mCol2.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol2( tmpV3_0 );
-    result1.setCol2( tmpV3_1 );
-    result2.setCol2( tmpV3_2 );
-    result3.setCol2( tmpV3_3 );
-    mCol3.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol3( tmpV3_0 );
-    result1.setCol3( tmpV3_1 );
-    result2.setCol3( tmpV3_2 );
-    result3.setCol3( tmpV3_3 );
-}
-
-inline Transform3 & Transform3::setCol0( const Vector3 & _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol1( const Vector3 & _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol2( const Vector3 & _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol3( const Vector3 & _col3 )
-{
-    mCol3 = _col3;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol( int col, const Vector3 & vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Transform3 & Transform3::setRow( int row, const Vector4 & vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    mCol3.setElem( row, vec.getElem( 3 ) );
-    return *this;
-}
-
-inline Transform3 & Transform3::setElem( int col, int row, vec_float4 val )
-{
-    Vector3 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline vec_float4 Transform3::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector3 Transform3::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector3 Transform3::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector3 Transform3::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector3 Transform3::getCol3( ) const
-{
-    return mCol3;
-}
-
-inline const Vector3 Transform3::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Transform3::getRow( int row ) const
-{
-    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
-}
-
-inline Vector3 & Transform3::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Transform3::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
-{
-    mCol0 = tfrm.mCol0;
-    mCol1 = tfrm.mCol1;
-    mCol2 = tfrm.mCol2;
-    mCol3 = tfrm.mCol3;
-    return *this;
-}
-
-inline const Transform3 inverse( const Transform3 & tfrm )
-{
-    Vector3 tmp0, tmp1, tmp2, inv0, inv1, inv2;
-    vec_float4 detinv;
-    tmp0 = cross( tfrm.getCol1(), tfrm.getCol2() );
-    tmp1 = cross( tfrm.getCol2(), tfrm.getCol0() );
-    tmp2 = cross( tfrm.getCol0(), tfrm.getCol1() );
-    detinv = recipf4( dot( tfrm.getCol2(), tmp2 ) );
-    inv0 = Vector3( spu_mul( tmp0.getX(), detinv ), spu_mul( tmp1.getX(), detinv ), spu_mul( tmp2.getX(), detinv ) );
-    inv1 = Vector3( spu_mul( tmp0.getY(), detinv ), spu_mul( tmp1.getY(), detinv ), spu_mul( tmp2.getY(), detinv ) );
-    inv2 = Vector3( spu_mul( tmp0.getZ(), detinv ), spu_mul( tmp1.getZ(), detinv ), spu_mul( tmp2.getZ(), detinv ) );
-    return Transform3(
-        inv0,
-        inv1,
-        inv2,
-        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
-    );
-}
-
-inline const Transform3 orthoInverse( const Transform3 & tfrm )
-{
-    Vector3 inv0, inv1, inv2;
-    inv0 = Vector3( tfrm.getCol0().getX(), tfrm.getCol1().getX(), tfrm.getCol2().getX() );
-    inv1 = Vector3( tfrm.getCol0().getY(), tfrm.getCol1().getY(), tfrm.getCol2().getY() );
-    inv2 = Vector3( tfrm.getCol0().getZ(), tfrm.getCol1().getZ(), tfrm.getCol2().getZ() );
-    return Transform3(
-        inv0,
-        inv1,
-        inv2,
-        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
-    );
-}
-
-inline const Transform3 absPerElem( const Transform3 & tfrm )
-{
-    return Transform3(
-        absPerElem( tfrm.getCol0() ),
-        absPerElem( tfrm.getCol1() ),
-        absPerElem( tfrm.getCol2() ),
-        absPerElem( tfrm.getCol3() )
-    );
-}
-
-inline const Vector3 Transform3::operator *( const Vector3 & vec ) const
-{
-    return Vector3(
-        spu_add( spu_add( spu_mul( mCol0.getX(), vec.getX() ), spu_mul( mCol1.getX(), vec.getY() ) ), spu_mul( mCol2.getX(), vec.getZ() ) ),
-        spu_add( spu_add( spu_mul( mCol0.getY(), vec.getX() ), spu_mul( mCol1.getY(), vec.getY() ) ), spu_mul( mCol2.getY(), vec.getZ() ) ),
-        spu_add( spu_add( spu_mul( mCol0.getZ(), vec.getX() ), spu_mul( mCol1.getZ(), vec.getY() ) ), spu_mul( mCol2.getZ(), vec.getZ() ) )
-    );
-}
-
-inline const Point3 Transform3::operator *( const Point3 & pnt ) const
-{
-    return Point3(
-        spu_add( spu_add( spu_add( spu_mul( mCol0.getX(), pnt.getX() ), spu_mul( mCol1.getX(), pnt.getY() ) ), spu_mul( mCol2.getX(), pnt.getZ() ) ), mCol3.getX() ),
-        spu_add( spu_add( spu_add( spu_mul( mCol0.getY(), pnt.getX() ), spu_mul( mCol1.getY(), pnt.getY() ) ), spu_mul( mCol2.getY(), pnt.getZ() ) ), mCol3.getY() ),
-        spu_add( spu_add( spu_add( spu_mul( mCol0.getZ(), pnt.getX() ), spu_mul( mCol1.getZ(), pnt.getY() ) ), spu_mul( mCol2.getZ(), pnt.getZ() ) ), mCol3.getZ() )
-    );
-}
-
-inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
-{
-    return Transform3(
-        ( *this * tfrm.mCol0 ),
-        ( *this * tfrm.mCol1 ),
-        ( *this * tfrm.mCol2 ),
-        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
-    );
-}
-
-inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
-{
-    *this = *this * tfrm;
-    return *this;
-}
-
-inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
-{
-    return Transform3(
-        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
-        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
-        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
-        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
-    );
-}
-
-inline const Transform3 Transform3::identity( )
-{
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( ),
-        Vector3( spu_splats(0.0f) )
-    );
-}
-
-inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
-{
-    mCol0 = tfrm.getCol0();
-    mCol1 = tfrm.getCol1();
-    mCol2 = tfrm.getCol2();
-    return *this;
-}
-
-inline const Matrix3 Transform3::getUpper3x3( ) const
-{
-    return Matrix3( mCol0, mCol1, mCol2 );
-}
-
-inline Transform3 & Transform3::setTranslation( const Vector3 & translateVec )
-{
-    mCol3 = translateVec;
-    return *this;
-}
-
-inline const Vector3 Transform3::getTranslation( ) const
-{
-    return mCol3;
-}
-
-inline const Transform3 Transform3::rotationX( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3( spu_splats(0.0f), c, s ),
-        Vector3( spu_splats(0.0f), negatef4( s ), c ),
-        Vector3( spu_splats(0.0f) )
-    );
-}
-
-inline const Transform3 Transform3::rotationY( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Transform3(
-        Vector3( c, spu_splats(0.0f), negatef4( s ) ),
-        Vector3::yAxis( ),
-        Vector3( s, spu_splats(0.0f), c ),
-        Vector3( spu_splats(0.0f) )
-    );
-}
-
-inline const Transform3 Transform3::rotationZ( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Transform3(
-        Vector3( c, s, spu_splats(0.0f) ),
-        Vector3( negatef4( s ), c, spu_splats(0.0f) ),
-        Vector3::zAxis( ),
-        Vector3( spu_splats(0.0f) )
-    );
-}
-
-inline const Transform3 Transform3::rotationZYX( const Vector3 & radiansXYZ )
-{
-    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sincosf4( radiansXYZ.getX(), &sX, &cX );
-    sincosf4( radiansXYZ.getY(), &sY, &cY );
-    sincosf4( radiansXYZ.getZ(), &sZ, &cZ );
-    tmp0 = spu_mul( cZ, sY );
-    tmp1 = spu_mul( sZ, sY );
-    return Transform3(
-        Vector3( spu_mul( cZ, cY ), spu_mul( sZ, cY ), negatef4( sY ) ),
-        Vector3( spu_sub( spu_mul( tmp0, sX ), spu_mul( sZ, cX ) ), spu_add( spu_mul( tmp1, sX ), spu_mul( cZ, cX ) ), spu_mul( cY, sX ) ),
-        Vector3( spu_add( spu_mul( tmp0, cX ), spu_mul( sZ, sX ) ), spu_sub( spu_mul( tmp1, cX ), spu_mul( cZ, sX ) ), spu_mul( cY, cX ) ),
-        Vector3( spu_splats(0.0f) )
-    );
-}
-
-inline const Transform3 Transform3::rotation( vec_float4 radians, const Vector3 & unitVec )
-{
-    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( spu_splats(0.0f) ) );
-}
-
-inline const Transform3 Transform3::rotation( const Quat & unitQuat )
-{
-    return Transform3( Matrix3( unitQuat ), Vector3( spu_splats(0.0f) ) );
-}
-
-inline const Transform3 Transform3::scale( const Vector3 & scaleVec )
-{
-    return Transform3(
-        Vector3( scaleVec.getX(), spu_splats(0.0f), spu_splats(0.0f) ),
-        Vector3( spu_splats(0.0f), scaleVec.getY(), spu_splats(0.0f) ),
-        Vector3( spu_splats(0.0f), spu_splats(0.0f), scaleVec.getZ() ),
-        Vector3( spu_splats(0.0f) )
-    );
-}
-
-inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec )
-{
-    return Transform3(
-        ( tfrm.getCol0() * scaleVec.getX( ) ),
-        ( tfrm.getCol1() * scaleVec.getY( ) ),
-        ( tfrm.getCol2() * scaleVec.getZ( ) ),
-        tfrm.getCol3()
-    );
-}
-
-inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm )
-{
-    return Transform3(
-        mulPerElem( tfrm.getCol0(), scaleVec ),
-        mulPerElem( tfrm.getCol1(), scaleVec ),
-        mulPerElem( tfrm.getCol2(), scaleVec ),
-        mulPerElem( tfrm.getCol3(), scaleVec )
-    );
-}
-
-inline const Transform3 Transform3::translation( const Vector3 & translateVec )
-{
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( ),
-        translateVec
-    );
-}
-
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, vec_uint4 select1 )
-{
-    return Transform3(
-        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
-        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
-        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
-        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Transform3 & tfrm )
-{
-    Aos::Transform3 mat0, mat1, mat2, mat3;
-    tfrm.get4Aos( mat0, mat1, mat2, mat3 );
-    printf("slot 0:\n");
-    print( mat0 );
-    printf("slot 1:\n");
-    print( mat1 );
-    printf("slot 2:\n");
-    print( mat2 );
-    printf("slot 3:\n");
-    print( mat3 );
-}
-
-inline void print( const Transform3 & tfrm, const char * name )
-{
-    printf("%s:\n", name);
-    print( tfrm );
-}
-
-#endif
-
-inline Quat::Quat( const Matrix3 & tfrm )
-{
-    vec_float4 trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
-    vec_uint4 negTrace, ZgtX, ZgtY, YgtX;
-    vec_uint4 largestXorY, largestYorZ, largestZorX;
-
-    xx = tfrm.getCol0().getX();
-    yx = tfrm.getCol0().getY();
-    zx = tfrm.getCol0().getZ();
-    xy = tfrm.getCol1().getX();
-    yy = tfrm.getCol1().getY();
-    zy = tfrm.getCol1().getZ();
-    xz = tfrm.getCol2().getX();
-    yz = tfrm.getCol2().getY();
-    zz = tfrm.getCol2().getZ();
-
-    trace = spu_add( spu_add( xx, yy ), zz );
-
-    negTrace = spu_cmpgt( spu_splats(0.0f), trace );
-    ZgtX = spu_cmpgt( zz, xx );
-    ZgtY = spu_cmpgt( zz, yy );
-    YgtX = spu_cmpgt( yy, xx );
-    largestXorY = spu_and( negTrace, spu_nand( ZgtX, ZgtY ) );
-    largestYorZ = spu_and( negTrace, spu_or( YgtX, ZgtX ) );
-    largestZorX = spu_and( negTrace, spu_orc( ZgtY, YgtX ) );
-    
-    zz = spu_sel( zz, negatef4(zz), largestXorY );
-    xy = spu_sel( xy, negatef4(xy), largestXorY );
-    xx = spu_sel( xx, negatef4(xx), largestYorZ );
-    yz = spu_sel( yz, negatef4(yz), largestYorZ );
-    yy = spu_sel( yy, negatef4(yy), largestZorX );
-    zx = spu_sel( zx, negatef4(zx), largestZorX );
-
-    radicand = spu_add( spu_add( spu_add( xx, yy ), zz ), spu_splats(1.0f) );
-    scale = spu_mul( spu_splats(0.5f), rsqrtf4( radicand ) );
-
-    tmpx = spu_mul( spu_sub( zy, yz ), scale );
-    tmpy = spu_mul( spu_sub( xz, zx ), scale );
-    tmpz = spu_mul( spu_sub( yx, xy ), scale );
-    tmpw = spu_mul( radicand, scale );
-    qx = tmpx;
-    qy = tmpy;
-    qz = tmpz;
-    qw = tmpw;
-
-    qx = spu_sel( qx, tmpw, largestXorY );
-    qy = spu_sel( qy, tmpz, largestXorY );
-    qz = spu_sel( qz, tmpy, largestXorY );
-    qw = spu_sel( qw, tmpx, largestXorY );
-    tmpx = qx;
-    tmpz = qz;
-    qx = spu_sel( qx, qy, largestYorZ );
-    qy = spu_sel( qy, tmpx, largestYorZ );
-    qz = spu_sel( qz, qw, largestYorZ );
-    qw = spu_sel( qw, tmpz, largestYorZ );
-
-    mX = qx;
-    mY = qy;
-    mZ = qz;
-    mW = qw;
-}
-
-inline const Matrix3 outer( const Vector3 & tfrm0, const Vector3 & tfrm1 )
-{
-    return Matrix3(
-        ( tfrm0 * tfrm1.getX( ) ),
-        ( tfrm0 * tfrm1.getY( ) ),
-        ( tfrm0 * tfrm1.getZ( ) )
-    );
-}
-
-inline const Matrix4 outer( const Vector4 & tfrm0, const Vector4 & tfrm1 )
-{
-    return Matrix4(
-        ( tfrm0 * tfrm1.getX( ) ),
-        ( tfrm0 * tfrm1.getY( ) ),
-        ( tfrm0 * tfrm1.getZ( ) ),
-        ( tfrm0 * tfrm1.getW( ) )
-    );
-}
-
-inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat )
-{
-    return Vector3(
-        spu_add( spu_add( spu_mul( vec.getX(), mat.getCol0().getX() ), spu_mul( vec.getY(), mat.getCol0().getY() ) ), spu_mul( vec.getZ(), mat.getCol0().getZ() ) ),
-        spu_add( spu_add( spu_mul( vec.getX(), mat.getCol1().getX() ), spu_mul( vec.getY(), mat.getCol1().getY() ) ), spu_mul( vec.getZ(), mat.getCol1().getZ() ) ),
-        spu_add( spu_add( spu_mul( vec.getX(), mat.getCol2().getX() ), spu_mul( vec.getY(), mat.getCol2().getY() ) ), spu_mul( vec.getZ(), mat.getCol2().getZ() ) )
-    );
-}
-
-inline const Matrix3 crossMatrix( const Vector3 & vec )
-{
-    return Matrix3(
-        Vector3( spu_splats(0.0f), vec.getZ(), negatef4( vec.getY() ) ),
-        Vector3( negatef4( vec.getZ() ), spu_splats(0.0f), vec.getX() ),
-        Vector3( vec.getY(), negatef4( vec.getX() ), spu_splats(0.0f) )
-    );
-}
-
-inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat )
-{
-    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
-}
-
-} // namespace Soa
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_SOA_CPP_H
+#define _VECTORMATH_MAT_SOA_CPP_H
+
+namespace Vectormath {
+namespace Soa {
+
+//-----------------------------------------------------------------------------
+// Constants
+
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+inline Matrix3::Matrix3( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+}
+
+inline Matrix3::Matrix3( vec_float4 scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+}
+
+inline Matrix3::Matrix3( const Quat & unitQuat )
+{
+    vec_float4 qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
+    qx = unitQuat.getX();
+    qy = unitQuat.getY();
+    qz = unitQuat.getZ();
+    qw = unitQuat.getW();
+    qx2 = spu_add( qx, qx );
+    qy2 = spu_add( qy, qy );
+    qz2 = spu_add( qz, qz );
+    qxqx2 = spu_mul( qx, qx2 );
+    qxqy2 = spu_mul( qx, qy2 );
+    qxqz2 = spu_mul( qx, qz2 );
+    qxqw2 = spu_mul( qw, qx2 );
+    qyqy2 = spu_mul( qy, qy2 );
+    qyqz2 = spu_mul( qy, qz2 );
+    qyqw2 = spu_mul( qw, qy2 );
+    qzqz2 = spu_mul( qz, qz2 );
+    qzqw2 = spu_mul( qw, qz2 );
+    mCol0 = Vector3( spu_sub( spu_sub( spu_splats(1.0f), qyqy2 ), qzqz2 ), spu_add( qxqy2, qzqw2 ), spu_sub( qxqz2, qyqw2 ) );
+    mCol1 = Vector3( spu_sub( qxqy2, qzqw2 ), spu_sub( spu_sub( spu_splats(1.0f), qxqx2 ), qzqz2 ), spu_add( qyqz2, qxqw2 ) );
+    mCol2 = Vector3( spu_add( qxqz2, qyqw2 ), spu_sub( qyqz2, qxqw2 ), spu_sub( spu_sub( spu_splats(1.0f), qxqx2 ), qyqy2 ) );
+}
+
+inline Matrix3::Matrix3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+}
+
+inline Matrix3::Matrix3( const Aos::Matrix3 & mat )
+{
+    mCol0 = Vector3( mat.getCol0() );
+    mCol1 = Vector3( mat.getCol1() );
+    mCol2 = Vector3( mat.getCol2() );
+}
+
+inline Matrix3::Matrix3( const Aos::Matrix3 & mat0, const Aos::Matrix3 & mat1, const Aos::Matrix3 & mat2, const Aos::Matrix3 & mat3 )
+{
+    mCol0 = Vector3( mat0.getCol0(), mat1.getCol0(), mat2.getCol0(), mat3.getCol0() );
+    mCol1 = Vector3( mat0.getCol1(), mat1.getCol1(), mat2.getCol1(), mat3.getCol1() );
+    mCol2 = Vector3( mat0.getCol2(), mat1.getCol2(), mat2.getCol2(), mat3.getCol2() );
+}
+
+inline void Matrix3::get4Aos( Aos::Matrix3 & result0, Aos::Matrix3 & result1, Aos::Matrix3 & result2, Aos::Matrix3 & result3 ) const
+{
+    Aos::Vector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    mCol0.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol0( tmpV3_0 );
+    result1.setCol0( tmpV3_1 );
+    result2.setCol0( tmpV3_2 );
+    result3.setCol0( tmpV3_3 );
+    mCol1.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol1( tmpV3_0 );
+    result1.setCol1( tmpV3_1 );
+    result2.setCol1( tmpV3_2 );
+    result3.setCol1( tmpV3_3 );
+    mCol2.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol2( tmpV3_0 );
+    result1.setCol2( tmpV3_1 );
+    result2.setCol2( tmpV3_2 );
+    result3.setCol2( tmpV3_3 );
+}
+
+inline Matrix3 & Matrix3::setCol0( const Vector3 & _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol1( const Vector3 & _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol2( const Vector3 & _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol( int col, const Vector3 & vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setRow( int row, const Vector3 & vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setElem( int col, int row, vec_float4 val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline vec_float4 Matrix3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Matrix3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Matrix3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Matrix3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Matrix3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::getRow( int row ) const
+{
+    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
+}
+
+inline Vector3 & Matrix3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    return *this;
+}
+
+inline const Matrix3 transpose( const Matrix3 & mat )
+{
+    return Matrix3(
+        Vector3( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX() ),
+        Vector3( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY() ),
+        Vector3( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ() )
+    );
+}
+
+inline const Matrix3 inverse( const Matrix3 & mat )
+{
+    Vector3 tmp0, tmp1, tmp2;
+    vec_float4 detinv;
+    tmp0 = cross( mat.getCol1(), mat.getCol2() );
+    tmp1 = cross( mat.getCol2(), mat.getCol0() );
+    tmp2 = cross( mat.getCol0(), mat.getCol1() );
+    detinv = recipf4( dot( mat.getCol2(), tmp2 ) );
+    return Matrix3(
+        Vector3( spu_mul( tmp0.getX(), detinv ), spu_mul( tmp1.getX(), detinv ), spu_mul( tmp2.getX(), detinv ) ),
+        Vector3( spu_mul( tmp0.getY(), detinv ), spu_mul( tmp1.getY(), detinv ), spu_mul( tmp2.getY(), detinv ) ),
+        Vector3( spu_mul( tmp0.getZ(), detinv ), spu_mul( tmp1.getZ(), detinv ), spu_mul( tmp2.getZ(), detinv ) )
+    );
+}
+
+inline vec_float4 determinant( const Matrix3 & mat )
+{
+    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
+}
+
+inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 )
+    );
+}
+
+inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix3 Matrix3::operator -( ) const
+{
+    return Matrix3(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 )
+    );
+}
+
+inline const Matrix3 absPerElem( const Matrix3 & mat )
+{
+    return Matrix3(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::operator *( vec_float4 scalar ) const
+{
+    return Matrix3(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( vec_float4 scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix3 operator *( vec_float4 scalar, const Matrix3 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector3 Matrix3::operator *( const Vector3 & vec ) const
+{
+    return Vector3(
+        spu_add( spu_add( spu_mul( mCol0.getX(), vec.getX() ), spu_mul( mCol1.getX(), vec.getY() ) ), spu_mul( mCol2.getX(), vec.getZ() ) ),
+        spu_add( spu_add( spu_mul( mCol0.getY(), vec.getX() ), spu_mul( mCol1.getY(), vec.getY() ) ), spu_mul( mCol2.getY(), vec.getZ() ) ),
+        spu_add( spu_add( spu_mul( mCol0.getZ(), vec.getX() ), spu_mul( mCol1.getZ(), vec.getY() ) ), spu_mul( mCol2.getZ(), vec.getZ() ) )
+    );
+}
+
+inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
+{
+    return Matrix3(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::identity( )
+{
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationX( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3( spu_splats(0.0f), c, s ),
+        Vector3( spu_splats(0.0f), negatef4( s ), c )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationY( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Matrix3(
+        Vector3( c, spu_splats(0.0f), negatef4( s ) ),
+        Vector3::yAxis( ),
+        Vector3( s, spu_splats(0.0f), c )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationZ( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Matrix3(
+        Vector3( c, s, spu_splats(0.0f) ),
+        Vector3( negatef4( s ), c, spu_splats(0.0f) ),
+        Vector3::zAxis( )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationZYX( const Vector3 & radiansXYZ )
+{
+    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sincosf4( radiansXYZ.getX(), &sX, &cX );
+    sincosf4( radiansXYZ.getY(), &sY, &cY );
+    sincosf4( radiansXYZ.getZ(), &sZ, &cZ );
+    tmp0 = spu_mul( cZ, sY );
+    tmp1 = spu_mul( sZ, sY );
+    return Matrix3(
+        Vector3( spu_mul( cZ, cY ), spu_mul( sZ, cY ), negatef4( sY ) ),
+        Vector3( spu_sub( spu_mul( tmp0, sX ), spu_mul( sZ, cX ) ), spu_add( spu_mul( tmp1, sX ), spu_mul( cZ, cX ) ), spu_mul( cY, sX ) ),
+        Vector3( spu_add( spu_mul( tmp0, cX ), spu_mul( sZ, sX ) ), spu_sub( spu_mul( tmp1, cX ), spu_mul( cZ, sX ) ), spu_mul( cY, cX ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( vec_float4 radians, const Vector3 & unitVec )
+{
+    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
+    sincosf4( radians, &s, &c );
+    x = unitVec.getX();
+    y = unitVec.getY();
+    z = unitVec.getZ();
+    xy = spu_mul( x, y );
+    yz = spu_mul( y, z );
+    zx = spu_mul( z, x );
+    oneMinusC = spu_sub( spu_splats(1.0f), c );
+    return Matrix3(
+        Vector3( spu_add( spu_mul( spu_mul( x, x ), oneMinusC ), c ), spu_add( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_sub( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ) ),
+        Vector3( spu_sub( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_add( spu_mul( spu_mul( y, y ), oneMinusC ), c ), spu_add( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ) ),
+        Vector3( spu_add( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ), spu_sub( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ), spu_add( spu_mul( spu_mul( z, z ), oneMinusC ), c ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( const Quat & unitQuat )
+{
+    return Matrix3( unitQuat );
+}
+
+inline const Matrix3 Matrix3::scale( const Vector3 & scaleVec )
+{
+    return Matrix3(
+        Vector3( scaleVec.getX(), spu_splats(0.0f), spu_splats(0.0f) ),
+        Vector3( spu_splats(0.0f), scaleVec.getY(), spu_splats(0.0f) ),
+        Vector3( spu_splats(0.0f), spu_splats(0.0f), scaleVec.getZ() )
+    );
+}
+
+inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec )
+{
+    return Matrix3(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) )
+    );
+}
+
+inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat )
+{
+    return Matrix3(
+        mulPerElem( mat.getCol0(), scaleVec ),
+        mulPerElem( mat.getCol1(), scaleVec ),
+        mulPerElem( mat.getCol2(), scaleVec )
+    );
+}
+
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, vec_uint4 select1 )
+{
+    return Matrix3(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix3 & mat )
+{
+    Aos::Matrix3 mat0, mat1, mat2, mat3;
+    mat.get4Aos( mat0, mat1, mat2, mat3 );
+    printf("slot 0:\n");
+    print( mat0 );
+    printf("slot 1:\n");
+    print( mat1 );
+    printf("slot 2:\n");
+    print( mat2 );
+    printf("slot 3:\n");
+    print( mat3 );
+}
+
+inline void print( const Matrix3 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Matrix4::Matrix4( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+}
+
+inline Matrix4::Matrix4( vec_float4 scalar )
+{
+    mCol0 = Vector4( scalar );
+    mCol1 = Vector4( scalar );
+    mCol2 = Vector4( scalar );
+    mCol3 = Vector4( scalar );
+}
+
+inline Matrix4::Matrix4( const Transform3 & mat )
+{
+    mCol0 = Vector4( mat.getCol0(), spu_splats(0.0f) );
+    mCol1 = Vector4( mat.getCol1(), spu_splats(0.0f) );
+    mCol2 = Vector4( mat.getCol2(), spu_splats(0.0f) );
+    mCol3 = Vector4( mat.getCol3(), spu_splats(1.0f) );
+}
+
+inline Matrix4::Matrix4( const Vector4 & _col0, const Vector4 & _col1, const Vector4 & _col2, const Vector4 & _col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Matrix4::Matrix4( const Matrix3 & mat, const Vector3 & translateVec )
+{
+    mCol0 = Vector4( mat.getCol0(), spu_splats(0.0f) );
+    mCol1 = Vector4( mat.getCol1(), spu_splats(0.0f) );
+    mCol2 = Vector4( mat.getCol2(), spu_splats(0.0f) );
+    mCol3 = Vector4( translateVec, spu_splats(1.0f) );
+}
+
+inline Matrix4::Matrix4( const Quat & unitQuat, const Vector3 & translateVec )
+{
+    Matrix3 mat;
+    mat = Matrix3( unitQuat );
+    mCol0 = Vector4( mat.getCol0(), spu_splats(0.0f) );
+    mCol1 = Vector4( mat.getCol1(), spu_splats(0.0f) );
+    mCol2 = Vector4( mat.getCol2(), spu_splats(0.0f) );
+    mCol3 = Vector4( translateVec, spu_splats(1.0f) );
+}
+
+inline Matrix4::Matrix4( const Aos::Matrix4 & mat )
+{
+    mCol0 = Vector4( mat.getCol0() );
+    mCol1 = Vector4( mat.getCol1() );
+    mCol2 = Vector4( mat.getCol2() );
+    mCol3 = Vector4( mat.getCol3() );
+}
+
+inline Matrix4::Matrix4( const Aos::Matrix4 & mat0, const Aos::Matrix4 & mat1, const Aos::Matrix4 & mat2, const Aos::Matrix4 & mat3 )
+{
+    mCol0 = Vector4( mat0.getCol0(), mat1.getCol0(), mat2.getCol0(), mat3.getCol0() );
+    mCol1 = Vector4( mat0.getCol1(), mat1.getCol1(), mat2.getCol1(), mat3.getCol1() );
+    mCol2 = Vector4( mat0.getCol2(), mat1.getCol2(), mat2.getCol2(), mat3.getCol2() );
+    mCol3 = Vector4( mat0.getCol3(), mat1.getCol3(), mat2.getCol3(), mat3.getCol3() );
+}
+
+inline void Matrix4::get4Aos( Aos::Matrix4 & result0, Aos::Matrix4 & result1, Aos::Matrix4 & result2, Aos::Matrix4 & result3 ) const
+{
+    Aos::Vector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
+    mCol0.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
+    result0.setCol0( tmpV4_0 );
+    result1.setCol0( tmpV4_1 );
+    result2.setCol0( tmpV4_2 );
+    result3.setCol0( tmpV4_3 );
+    mCol1.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
+    result0.setCol1( tmpV4_0 );
+    result1.setCol1( tmpV4_1 );
+    result2.setCol1( tmpV4_2 );
+    result3.setCol1( tmpV4_3 );
+    mCol2.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
+    result0.setCol2( tmpV4_0 );
+    result1.setCol2( tmpV4_1 );
+    result2.setCol2( tmpV4_2 );
+    result3.setCol2( tmpV4_3 );
+    mCol3.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
+    result0.setCol3( tmpV4_0 );
+    result1.setCol3( tmpV4_1 );
+    result2.setCol3( tmpV4_2 );
+    result3.setCol3( tmpV4_3 );
+}
+
+inline Matrix4 & Matrix4::setCol0( const Vector4 & _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol1( const Vector4 & _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol2( const Vector4 & _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol3( const Vector4 & _col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol( int col, const Vector4 & vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setRow( int row, const Vector4 & vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setElem( int col, int row, vec_float4 val )
+{
+    Vector4 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline vec_float4 Matrix4::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector4 Matrix4::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector4 Matrix4::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector4 Matrix4::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector4 Matrix4::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector4 Matrix4::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector4 & Matrix4::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+    return *this;
+}
+
+inline const Matrix4 transpose( const Matrix4 & mat )
+{
+    return Matrix4(
+        Vector4( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX(), mat.getCol3().getX() ),
+        Vector4( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY(), mat.getCol3().getY() ),
+        Vector4( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ(), mat.getCol3().getZ() ),
+        Vector4( mat.getCol0().getW(), mat.getCol1().getW(), mat.getCol2().getW(), mat.getCol3().getW() )
+    );
+}
+
+inline const Matrix4 inverse( const Matrix4 & mat )
+{
+    Vector4 res0, res1, res2, res3;
+    vec_float4 mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
+    mA = mat.getCol0().getX();
+    mB = mat.getCol0().getY();
+    mC = mat.getCol0().getZ();
+    mD = mat.getCol0().getW();
+    mE = mat.getCol1().getX();
+    mF = mat.getCol1().getY();
+    mG = mat.getCol1().getZ();
+    mH = mat.getCol1().getW();
+    mI = mat.getCol2().getX();
+    mJ = mat.getCol2().getY();
+    mK = mat.getCol2().getZ();
+    mL = mat.getCol2().getW();
+    mM = mat.getCol3().getX();
+    mN = mat.getCol3().getY();
+    mO = mat.getCol3().getZ();
+    mP = mat.getCol3().getW();
+    tmp0 = spu_sub( spu_mul( mK, mD ), spu_mul( mC, mL ) );
+    tmp1 = spu_sub( spu_mul( mO, mH ), spu_mul( mG, mP ) );
+    tmp2 = spu_sub( spu_mul( mB, mK ), spu_mul( mJ, mC ) );
+    tmp3 = spu_sub( spu_mul( mF, mO ), spu_mul( mN, mG ) );
+    tmp4 = spu_sub( spu_mul( mJ, mD ), spu_mul( mB, mL ) );
+    tmp5 = spu_sub( spu_mul( mN, mH ), spu_mul( mF, mP ) );
+    res0.setX( spu_sub( spu_sub( spu_mul( mJ, tmp1 ), spu_mul( mL, tmp3 ) ), spu_mul( mK, tmp5 ) ) );
+    res0.setY( spu_sub( spu_sub( spu_mul( mN, tmp0 ), spu_mul( mP, tmp2 ) ), spu_mul( mO, tmp4 ) ) );
+    res0.setZ( spu_sub( spu_add( spu_mul( mD, tmp3 ), spu_mul( mC, tmp5 ) ), spu_mul( mB, tmp1 ) ) );
+    res0.setW( spu_sub( spu_add( spu_mul( mH, tmp2 ), spu_mul( mG, tmp4 ) ), spu_mul( mF, tmp0 ) ) );
+    detInv = recipf4( spu_add( spu_add( spu_add( spu_mul( mA, res0.getX() ), spu_mul( mE, res0.getY() ) ), spu_mul( mI, res0.getZ() ) ), spu_mul( mM, res0.getW() ) ) );
+    res1.setX( spu_mul( mI, tmp1 ) );
+    res1.setY( spu_mul( mM, tmp0 ) );
+    res1.setZ( spu_mul( mA, tmp1 ) );
+    res1.setW( spu_mul( mE, tmp0 ) );
+    res3.setX( spu_mul( mI, tmp3 ) );
+    res3.setY( spu_mul( mM, tmp2 ) );
+    res3.setZ( spu_mul( mA, tmp3 ) );
+    res3.setW( spu_mul( mE, tmp2 ) );
+    res2.setX( spu_mul( mI, tmp5 ) );
+    res2.setY( spu_mul( mM, tmp4 ) );
+    res2.setZ( spu_mul( mA, tmp5 ) );
+    res2.setW( spu_mul( mE, tmp4 ) );
+    tmp0 = spu_sub( spu_mul( mI, mB ), spu_mul( mA, mJ ) );
+    tmp1 = spu_sub( spu_mul( mM, mF ), spu_mul( mE, mN ) );
+    tmp2 = spu_sub( spu_mul( mI, mD ), spu_mul( mA, mL ) );
+    tmp3 = spu_sub( spu_mul( mM, mH ), spu_mul( mE, mP ) );
+    tmp4 = spu_sub( spu_mul( mI, mC ), spu_mul( mA, mK ) );
+    tmp5 = spu_sub( spu_mul( mM, mG ), spu_mul( mE, mO ) );
+    res2.setX( spu_add( spu_sub( spu_mul( mL, tmp1 ), spu_mul( mJ, tmp3 ) ), res2.getX() ) );
+    res2.setY( spu_add( spu_sub( spu_mul( mP, tmp0 ), spu_mul( mN, tmp2 ) ), res2.getY() ) );
+    res2.setZ( spu_sub( spu_sub( spu_mul( mB, tmp3 ), spu_mul( mD, tmp1 ) ), res2.getZ() ) );
+    res2.setW( spu_sub( spu_sub( spu_mul( mF, tmp2 ), spu_mul( mH, tmp0 ) ), res2.getW() ) );
+    res3.setX( spu_add( spu_sub( spu_mul( mJ, tmp5 ), spu_mul( mK, tmp1 ) ), res3.getX() ) );
+    res3.setY( spu_add( spu_sub( spu_mul( mN, tmp4 ), spu_mul( mO, tmp0 ) ), res3.getY() ) );
+    res3.setZ( spu_sub( spu_sub( spu_mul( mC, tmp1 ), spu_mul( mB, tmp5 ) ), res3.getZ() ) );
+    res3.setW( spu_sub( spu_sub( spu_mul( mG, tmp0 ), spu_mul( mF, tmp4 ) ), res3.getW() ) );
+    res1.setX( spu_sub( spu_sub( spu_mul( mK, tmp3 ), spu_mul( mL, tmp5 ) ), res1.getX() ) );
+    res1.setY( spu_sub( spu_sub( spu_mul( mO, tmp2 ), spu_mul( mP, tmp4 ) ), res1.getY() ) );
+    res1.setZ( spu_add( spu_sub( spu_mul( mD, tmp5 ), spu_mul( mC, tmp3 ) ), res1.getZ() ) );
+    res1.setW( spu_add( spu_sub( spu_mul( mH, tmp4 ), spu_mul( mG, tmp2 ) ), res1.getW() ) );
+    return Matrix4(
+        ( res0 * detInv ),
+        ( res1 * detInv ),
+        ( res2 * detInv ),
+        ( res3 * detInv )
+    );
+}
+
+inline const Matrix4 affineInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( inverse( affineMat ) );
+}
+
+inline const Matrix4 orthoInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( orthoInverse( affineMat ) );
+}
+
+inline vec_float4 determinant( const Matrix4 & mat )
+{
+    vec_float4 dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    mA = mat.getCol0().getX();
+    mB = mat.getCol0().getY();
+    mC = mat.getCol0().getZ();
+    mD = mat.getCol0().getW();
+    mE = mat.getCol1().getX();
+    mF = mat.getCol1().getY();
+    mG = mat.getCol1().getZ();
+    mH = mat.getCol1().getW();
+    mI = mat.getCol2().getX();
+    mJ = mat.getCol2().getY();
+    mK = mat.getCol2().getZ();
+    mL = mat.getCol2().getW();
+    mM = mat.getCol3().getX();
+    mN = mat.getCol3().getY();
+    mO = mat.getCol3().getZ();
+    mP = mat.getCol3().getW();
+    tmp0 = spu_sub( spu_mul( mK, mD ), spu_mul( mC, mL ) );
+    tmp1 = spu_sub( spu_mul( mO, mH ), spu_mul( mG, mP ) );
+    tmp2 = spu_sub( spu_mul( mB, mK ), spu_mul( mJ, mC ) );
+    tmp3 = spu_sub( spu_mul( mF, mO ), spu_mul( mN, mG ) );
+    tmp4 = spu_sub( spu_mul( mJ, mD ), spu_mul( mB, mL ) );
+    tmp5 = spu_sub( spu_mul( mN, mH ), spu_mul( mF, mP ) );
+    dx = spu_sub( spu_sub( spu_mul( mJ, tmp1 ), spu_mul( mL, tmp3 ) ), spu_mul( mK, tmp5 ) );
+    dy = spu_sub( spu_sub( spu_mul( mN, tmp0 ), spu_mul( mP, tmp2 ) ), spu_mul( mO, tmp4 ) );
+    dz = spu_sub( spu_add( spu_mul( mD, tmp3 ), spu_mul( mC, tmp5 ) ), spu_mul( mB, tmp1 ) );
+    dw = spu_sub( spu_add( spu_mul( mH, tmp2 ), spu_mul( mG, tmp4 ) ), spu_mul( mF, tmp0 ) );
+    return spu_add( spu_add( spu_add( spu_mul( mA, dx ), spu_mul( mE, dy ) ), spu_mul( mI, dz ) ), spu_mul( mM, dw ) );
+}
+
+inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 ),
+        ( mCol3 + mat.mCol3 )
+    );
+}
+
+inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 ),
+        ( mCol3 - mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator -( ) const
+{
+    return Matrix4(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 ),
+        ( -mCol3 )
+    );
+}
+
+inline const Matrix4 absPerElem( const Matrix4 & mat )
+{
+    return Matrix4(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() ),
+        absPerElem( mat.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::operator *( vec_float4 scalar ) const
+{
+    return Matrix4(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar ),
+        ( mCol3 * scalar )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( vec_float4 scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix4 operator *( vec_float4 scalar, const Matrix4 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector4 Matrix4::operator *( const Vector4 & vec ) const
+{
+    return Vector4(
+        spu_add( spu_add( spu_add( spu_mul( mCol0.getX(), vec.getX() ), spu_mul( mCol1.getX(), vec.getY() ) ), spu_mul( mCol2.getX(), vec.getZ() ) ), spu_mul( mCol3.getX(), vec.getW() ) ),
+        spu_add( spu_add( spu_add( spu_mul( mCol0.getY(), vec.getX() ), spu_mul( mCol1.getY(), vec.getY() ) ), spu_mul( mCol2.getY(), vec.getZ() ) ), spu_mul( mCol3.getY(), vec.getW() ) ),
+        spu_add( spu_add( spu_add( spu_mul( mCol0.getZ(), vec.getX() ), spu_mul( mCol1.getZ(), vec.getY() ) ), spu_mul( mCol2.getZ(), vec.getZ() ) ), spu_mul( mCol3.getZ(), vec.getW() ) ),
+        spu_add( spu_add( spu_add( spu_mul( mCol0.getW(), vec.getX() ), spu_mul( mCol1.getW(), vec.getY() ) ), spu_mul( mCol2.getW(), vec.getZ() ) ), spu_mul( mCol3.getW(), vec.getW() ) )
+    );
+}
+
+inline const Vector4 Matrix4::operator *( const Vector3 & vec ) const
+{
+    return Vector4(
+        spu_add( spu_add( spu_mul( mCol0.getX(), vec.getX() ), spu_mul( mCol1.getX(), vec.getY() ) ), spu_mul( mCol2.getX(), vec.getZ() ) ),
+        spu_add( spu_add( spu_mul( mCol0.getY(), vec.getX() ), spu_mul( mCol1.getY(), vec.getY() ) ), spu_mul( mCol2.getY(), vec.getZ() ) ),
+        spu_add( spu_add( spu_mul( mCol0.getZ(), vec.getX() ), spu_mul( mCol1.getZ(), vec.getY() ) ), spu_mul( mCol2.getZ(), vec.getZ() ) ),
+        spu_add( spu_add( spu_mul( mCol0.getW(), vec.getX() ), spu_mul( mCol1.getW(), vec.getY() ) ), spu_mul( mCol2.getW(), vec.getZ() ) )
+    );
+}
+
+inline const Vector4 Matrix4::operator *( const Point3 & pnt ) const
+{
+    return Vector4(
+        spu_add( spu_add( spu_add( spu_mul( mCol0.getX(), pnt.getX() ), spu_mul( mCol1.getX(), pnt.getY() ) ), spu_mul( mCol2.getX(), pnt.getZ() ) ), mCol3.getX() ),
+        spu_add( spu_add( spu_add( spu_mul( mCol0.getY(), pnt.getX() ), spu_mul( mCol1.getY(), pnt.getY() ) ), spu_mul( mCol2.getY(), pnt.getZ() ) ), mCol3.getY() ),
+        spu_add( spu_add( spu_add( spu_mul( mCol0.getZ(), pnt.getX() ), spu_mul( mCol1.getZ(), pnt.getY() ) ), spu_mul( mCol2.getZ(), pnt.getZ() ) ), mCol3.getZ() ),
+        spu_add( spu_add( spu_add( spu_mul( mCol0.getW(), pnt.getX() ), spu_mul( mCol1.getW(), pnt.getY() ) ), spu_mul( mCol2.getW(), pnt.getZ() ) ), mCol3.getW() )
+    );
+}
+
+inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 ),
+        ( *this * mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
+{
+    return Matrix4(
+        ( *this * tfrm.getCol0() ),
+        ( *this * tfrm.getCol1() ),
+        ( *this * tfrm.getCol2() ),
+        ( *this * Point3( tfrm.getCol3() ) )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
+{
+    return Matrix4(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
+        mulPerElem( mat0.getCol3(), mat1.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::identity( )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
+{
+    mCol0.setXYZ( mat3.getCol0() );
+    mCol1.setXYZ( mat3.getCol1() );
+    mCol2.setXYZ( mat3.getCol2() );
+    return *this;
+}
+
+inline const Matrix3 Matrix4::getUpper3x3( ) const
+{
+    return Matrix3(
+        mCol0.getXYZ( ),
+        mCol1.getXYZ( ),
+        mCol2.getXYZ( )
+    );
+}
+
+inline Matrix4 & Matrix4::setTranslation( const Vector3 & translateVec )
+{
+    mCol3.setXYZ( translateVec );
+    return *this;
+}
+
+inline const Vector3 Matrix4::getTranslation( ) const
+{
+    return mCol3.getXYZ( );
+}
+
+inline const Matrix4 Matrix4::rotationX( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4( spu_splats(0.0f), c, s, spu_splats(0.0f) ),
+        Vector4( spu_splats(0.0f), negatef4( s ), c, spu_splats(0.0f) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationY( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Matrix4(
+        Vector4( c, spu_splats(0.0f), negatef4( s ), spu_splats(0.0f) ),
+        Vector4::yAxis( ),
+        Vector4( s, spu_splats(0.0f), c, spu_splats(0.0f) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZ( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Matrix4(
+        Vector4( c, s, spu_splats(0.0f), spu_splats(0.0f) ),
+        Vector4( negatef4( s ), c, spu_splats(0.0f), spu_splats(0.0f) ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZYX( const Vector3 & radiansXYZ )
+{
+    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sincosf4( radiansXYZ.getX(), &sX, &cX );
+    sincosf4( radiansXYZ.getY(), &sY, &cY );
+    sincosf4( radiansXYZ.getZ(), &sZ, &cZ );
+    tmp0 = spu_mul( cZ, sY );
+    tmp1 = spu_mul( sZ, sY );
+    return Matrix4(
+        Vector4( spu_mul( cZ, cY ), spu_mul( sZ, cY ), negatef4( sY ), spu_splats(0.0f) ),
+        Vector4( spu_sub( spu_mul( tmp0, sX ), spu_mul( sZ, cX ) ), spu_add( spu_mul( tmp1, sX ), spu_mul( cZ, cX ) ), spu_mul( cY, sX ), spu_splats(0.0f) ),
+        Vector4( spu_add( spu_mul( tmp0, cX ), spu_mul( sZ, sX ) ), spu_sub( spu_mul( tmp1, cX ), spu_mul( cZ, sX ) ), spu_mul( cY, cX ), spu_splats(0.0f) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( vec_float4 radians, const Vector3 & unitVec )
+{
+    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
+    sincosf4( radians, &s, &c );
+    x = unitVec.getX();
+    y = unitVec.getY();
+    z = unitVec.getZ();
+    xy = spu_mul( x, y );
+    yz = spu_mul( y, z );
+    zx = spu_mul( z, x );
+    oneMinusC = spu_sub( spu_splats(1.0f), c );
+    return Matrix4(
+        Vector4( spu_add( spu_mul( spu_mul( x, x ), oneMinusC ), c ), spu_add( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_sub( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ), spu_splats(0.0f) ),
+        Vector4( spu_sub( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_add( spu_mul( spu_mul( y, y ), oneMinusC ), c ), spu_add( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ), spu_splats(0.0f) ),
+        Vector4( spu_add( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ), spu_sub( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ), spu_add( spu_mul( spu_mul( z, z ), oneMinusC ), c ), spu_splats(0.0f) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( const Quat & unitQuat )
+{
+    return Matrix4( Transform3::rotation( unitQuat ) );
+}
+
+inline const Matrix4 Matrix4::scale( const Vector3 & scaleVec )
+{
+    return Matrix4(
+        Vector4( scaleVec.getX(), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) ),
+        Vector4( spu_splats(0.0f), scaleVec.getY(), spu_splats(0.0f), spu_splats(0.0f) ),
+        Vector4( spu_splats(0.0f), spu_splats(0.0f), scaleVec.getZ(), spu_splats(0.0f) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec )
+{
+    return Matrix4(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) ),
+        mat.getCol3()
+    );
+}
+
+inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat )
+{
+    Vector4 scale4;
+    scale4 = Vector4( scaleVec, spu_splats(1.0f) );
+    return Matrix4(
+        mulPerElem( mat.getCol0(), scale4 ),
+        mulPerElem( mat.getCol1(), scale4 ),
+        mulPerElem( mat.getCol2(), scale4 ),
+        mulPerElem( mat.getCol3(), scale4 )
+    );
+}
+
+inline const Matrix4 Matrix4::translation( const Vector3 & translateVec )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4( translateVec, spu_splats(1.0f) )
+    );
+}
+
+inline const Matrix4 Matrix4::lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec )
+{
+    Matrix4 m4EyeFrame;
+    Vector3 v3X, v3Y, v3Z;
+    v3Y = normalize( upVec );
+    v3Z = normalize( ( eyePos - lookAtPos ) );
+    v3X = normalize( cross( v3Y, v3Z ) );
+    v3Y = cross( v3Z, v3X );
+    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
+    return orthoInverse( m4EyeFrame );
+}
+
+inline const Matrix4 Matrix4::perspective( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar )
+{
+    vec_float4 f, rangeInv;
+    f = tanf4( spu_sub( spu_splats( _VECTORMATH_PI_OVER_2 ), spu_mul( spu_splats(0.5f), fovyRadians ) ) );
+    rangeInv = recipf4( spu_sub( zNear, zFar ) );
+    return Matrix4(
+        Vector4( divf4( f, aspect ), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) ),
+        Vector4( spu_splats(0.0f), f, spu_splats(0.0f), spu_splats(0.0f) ),
+        Vector4( spu_splats(0.0f), spu_splats(0.0f), spu_mul( spu_add( zNear, zFar ), rangeInv ), spu_splats(-1.0f) ),
+        Vector4( spu_splats(0.0f), spu_splats(0.0f), spu_mul( spu_mul( spu_mul( zNear, zFar ), rangeInv ), spu_splats(2.0f) ), spu_splats(0.0f) )
+    );
+}
+
+inline const Matrix4 Matrix4::frustum( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
+{
+    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
+    sum_rl = spu_add( right, left );
+    sum_tb = spu_add( top, bottom );
+    sum_nf = spu_add( zNear, zFar );
+    inv_rl = recipf4( spu_sub( right, left ) );
+    inv_tb = recipf4( spu_sub( top, bottom ) );
+    inv_nf = recipf4( spu_sub( zNear, zFar ) );
+    n2 = spu_add( zNear, zNear );
+    return Matrix4(
+        Vector4( spu_mul( n2, inv_rl ), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) ),
+        Vector4( spu_splats(0.0f), spu_mul( n2, inv_tb ), spu_splats(0.0f), spu_splats(0.0f) ),
+        Vector4( spu_mul( sum_rl, inv_rl ), spu_mul( sum_tb, inv_tb ), spu_mul( sum_nf, inv_nf ), spu_splats(-1.0f) ),
+        Vector4( spu_splats(0.0f), spu_splats(0.0f), spu_mul( spu_mul( n2, inv_nf ), zFar ), spu_splats(0.0f) )
+    );
+}
+
+inline const Matrix4 Matrix4::orthographic( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
+{
+    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
+    sum_rl = spu_add( right, left );
+    sum_tb = spu_add( top, bottom );
+    sum_nf = spu_add( zNear, zFar );
+    inv_rl = recipf4( spu_sub( right, left ) );
+    inv_tb = recipf4( spu_sub( top, bottom ) );
+    inv_nf = recipf4( spu_sub( zNear, zFar ) );
+    return Matrix4(
+        Vector4( spu_add( inv_rl, inv_rl ), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) ),
+        Vector4( spu_splats(0.0f), spu_add( inv_tb, inv_tb ), spu_splats(0.0f), spu_splats(0.0f) ),
+        Vector4( spu_splats(0.0f), spu_splats(0.0f), spu_add( inv_nf, inv_nf ), spu_splats(0.0f) ),
+        Vector4( spu_mul( negatef4( sum_rl ), inv_rl ), spu_mul( negatef4( sum_tb ), inv_tb ), spu_mul( sum_nf, inv_nf ), spu_splats(1.0f) )
+    );
+}
+
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, vec_uint4 select1 )
+{
+    return Matrix4(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 ),
+        select( mat0.getCol3(), mat1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix4 & mat )
+{
+    Aos::Matrix4 mat0, mat1, mat2, mat3;
+    mat.get4Aos( mat0, mat1, mat2, mat3 );
+    printf("slot 0:\n");
+    print( mat0 );
+    printf("slot 1:\n");
+    print( mat1 );
+    printf("slot 2:\n");
+    print( mat2 );
+    printf("slot 3:\n");
+    print( mat3 );
+}
+
+inline void print( const Matrix4 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Transform3::Transform3( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+}
+
+inline Transform3::Transform3( vec_float4 scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+    mCol3 = Vector3( scalar );
+}
+
+inline Transform3::Transform3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2, const Vector3 & _col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Transform3::Transform3( const Matrix3 & tfrm, const Vector3 & translateVec )
+{
+    this->setUpper3x3( tfrm );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3::Transform3( const Quat & unitQuat, const Vector3 & translateVec )
+{
+    this->setUpper3x3( Matrix3( unitQuat ) );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3::Transform3( const Aos::Transform3 & tfrm )
+{
+    mCol0 = Vector3( tfrm.getCol0() );
+    mCol1 = Vector3( tfrm.getCol1() );
+    mCol2 = Vector3( tfrm.getCol2() );
+    mCol3 = Vector3( tfrm.getCol3() );
+}
+
+inline Transform3::Transform3( const Aos::Transform3 & tfrm0, const Aos::Transform3 & tfrm1, const Aos::Transform3 & tfrm2, const Aos::Transform3 & tfrm3 )
+{
+    mCol0 = Vector3( tfrm0.getCol0(), tfrm1.getCol0(), tfrm2.getCol0(), tfrm3.getCol0() );
+    mCol1 = Vector3( tfrm0.getCol1(), tfrm1.getCol1(), tfrm2.getCol1(), tfrm3.getCol1() );
+    mCol2 = Vector3( tfrm0.getCol2(), tfrm1.getCol2(), tfrm2.getCol2(), tfrm3.getCol2() );
+    mCol3 = Vector3( tfrm0.getCol3(), tfrm1.getCol3(), tfrm2.getCol3(), tfrm3.getCol3() );
+}
+
+inline void Transform3::get4Aos( Aos::Transform3 & result0, Aos::Transform3 & result1, Aos::Transform3 & result2, Aos::Transform3 & result3 ) const
+{
+    Aos::Vector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    mCol0.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol0( tmpV3_0 );
+    result1.setCol0( tmpV3_1 );
+    result2.setCol0( tmpV3_2 );
+    result3.setCol0( tmpV3_3 );
+    mCol1.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol1( tmpV3_0 );
+    result1.setCol1( tmpV3_1 );
+    result2.setCol1( tmpV3_2 );
+    result3.setCol1( tmpV3_3 );
+    mCol2.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol2( tmpV3_0 );
+    result1.setCol2( tmpV3_1 );
+    result2.setCol2( tmpV3_2 );
+    result3.setCol2( tmpV3_3 );
+    mCol3.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol3( tmpV3_0 );
+    result1.setCol3( tmpV3_1 );
+    result2.setCol3( tmpV3_2 );
+    result3.setCol3( tmpV3_3 );
+}
+
+inline Transform3 & Transform3::setCol0( const Vector3 & _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol1( const Vector3 & _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol2( const Vector3 & _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol3( const Vector3 & _col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol( int col, const Vector3 & vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Transform3 & Transform3::setRow( int row, const Vector4 & vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Transform3 & Transform3::setElem( int col, int row, vec_float4 val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline vec_float4 Transform3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Transform3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Transform3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Transform3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Transform3::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector3 Transform3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Transform3::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector3 & Transform3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Transform3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+    return *this;
+}
+
+inline const Transform3 inverse( const Transform3 & tfrm )
+{
+    Vector3 tmp0, tmp1, tmp2, inv0, inv1, inv2;
+    vec_float4 detinv;
+    tmp0 = cross( tfrm.getCol1(), tfrm.getCol2() );
+    tmp1 = cross( tfrm.getCol2(), tfrm.getCol0() );
+    tmp2 = cross( tfrm.getCol0(), tfrm.getCol1() );
+    detinv = recipf4( dot( tfrm.getCol2(), tmp2 ) );
+    inv0 = Vector3( spu_mul( tmp0.getX(), detinv ), spu_mul( tmp1.getX(), detinv ), spu_mul( tmp2.getX(), detinv ) );
+    inv1 = Vector3( spu_mul( tmp0.getY(), detinv ), spu_mul( tmp1.getY(), detinv ), spu_mul( tmp2.getY(), detinv ) );
+    inv2 = Vector3( spu_mul( tmp0.getZ(), detinv ), spu_mul( tmp1.getZ(), detinv ), spu_mul( tmp2.getZ(), detinv ) );
+    return Transform3(
+        inv0,
+        inv1,
+        inv2,
+        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
+    );
+}
+
+inline const Transform3 orthoInverse( const Transform3 & tfrm )
+{
+    Vector3 inv0, inv1, inv2;
+    inv0 = Vector3( tfrm.getCol0().getX(), tfrm.getCol1().getX(), tfrm.getCol2().getX() );
+    inv1 = Vector3( tfrm.getCol0().getY(), tfrm.getCol1().getY(), tfrm.getCol2().getY() );
+    inv2 = Vector3( tfrm.getCol0().getZ(), tfrm.getCol1().getZ(), tfrm.getCol2().getZ() );
+    return Transform3(
+        inv0,
+        inv1,
+        inv2,
+        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
+    );
+}
+
+inline const Transform3 absPerElem( const Transform3 & tfrm )
+{
+    return Transform3(
+        absPerElem( tfrm.getCol0() ),
+        absPerElem( tfrm.getCol1() ),
+        absPerElem( tfrm.getCol2() ),
+        absPerElem( tfrm.getCol3() )
+    );
+}
+
+inline const Vector3 Transform3::operator *( const Vector3 & vec ) const
+{
+    return Vector3(
+        spu_add( spu_add( spu_mul( mCol0.getX(), vec.getX() ), spu_mul( mCol1.getX(), vec.getY() ) ), spu_mul( mCol2.getX(), vec.getZ() ) ),
+        spu_add( spu_add( spu_mul( mCol0.getY(), vec.getX() ), spu_mul( mCol1.getY(), vec.getY() ) ), spu_mul( mCol2.getY(), vec.getZ() ) ),
+        spu_add( spu_add( spu_mul( mCol0.getZ(), vec.getX() ), spu_mul( mCol1.getZ(), vec.getY() ) ), spu_mul( mCol2.getZ(), vec.getZ() ) )
+    );
+}
+
+inline const Point3 Transform3::operator *( const Point3 & pnt ) const
+{
+    return Point3(
+        spu_add( spu_add( spu_add( spu_mul( mCol0.getX(), pnt.getX() ), spu_mul( mCol1.getX(), pnt.getY() ) ), spu_mul( mCol2.getX(), pnt.getZ() ) ), mCol3.getX() ),
+        spu_add( spu_add( spu_add( spu_mul( mCol0.getY(), pnt.getX() ), spu_mul( mCol1.getY(), pnt.getY() ) ), spu_mul( mCol2.getY(), pnt.getZ() ) ), mCol3.getY() ),
+        spu_add( spu_add( spu_add( spu_mul( mCol0.getZ(), pnt.getX() ), spu_mul( mCol1.getZ(), pnt.getY() ) ), spu_mul( mCol2.getZ(), pnt.getZ() ) ), mCol3.getZ() )
+    );
+}
+
+inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
+{
+    return Transform3(
+        ( *this * tfrm.mCol0 ),
+        ( *this * tfrm.mCol1 ),
+        ( *this * tfrm.mCol2 ),
+        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
+    );
+}
+
+inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
+{
+    return Transform3(
+        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
+        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
+        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
+        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
+    );
+}
+
+inline const Transform3 Transform3::identity( )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        Vector3( spu_splats(0.0f) )
+    );
+}
+
+inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
+{
+    mCol0 = tfrm.getCol0();
+    mCol1 = tfrm.getCol1();
+    mCol2 = tfrm.getCol2();
+    return *this;
+}
+
+inline const Matrix3 Transform3::getUpper3x3( ) const
+{
+    return Matrix3( mCol0, mCol1, mCol2 );
+}
+
+inline Transform3 & Transform3::setTranslation( const Vector3 & translateVec )
+{
+    mCol3 = translateVec;
+    return *this;
+}
+
+inline const Vector3 Transform3::getTranslation( ) const
+{
+    return mCol3;
+}
+
+inline const Transform3 Transform3::rotationX( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3( spu_splats(0.0f), c, s ),
+        Vector3( spu_splats(0.0f), negatef4( s ), c ),
+        Vector3( spu_splats(0.0f) )
+    );
+}
+
+inline const Transform3 Transform3::rotationY( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Transform3(
+        Vector3( c, spu_splats(0.0f), negatef4( s ) ),
+        Vector3::yAxis( ),
+        Vector3( s, spu_splats(0.0f), c ),
+        Vector3( spu_splats(0.0f) )
+    );
+}
+
+inline const Transform3 Transform3::rotationZ( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Transform3(
+        Vector3( c, s, spu_splats(0.0f) ),
+        Vector3( negatef4( s ), c, spu_splats(0.0f) ),
+        Vector3::zAxis( ),
+        Vector3( spu_splats(0.0f) )
+    );
+}
+
+inline const Transform3 Transform3::rotationZYX( const Vector3 & radiansXYZ )
+{
+    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sincosf4( radiansXYZ.getX(), &sX, &cX );
+    sincosf4( radiansXYZ.getY(), &sY, &cY );
+    sincosf4( radiansXYZ.getZ(), &sZ, &cZ );
+    tmp0 = spu_mul( cZ, sY );
+    tmp1 = spu_mul( sZ, sY );
+    return Transform3(
+        Vector3( spu_mul( cZ, cY ), spu_mul( sZ, cY ), negatef4( sY ) ),
+        Vector3( spu_sub( spu_mul( tmp0, sX ), spu_mul( sZ, cX ) ), spu_add( spu_mul( tmp1, sX ), spu_mul( cZ, cX ) ), spu_mul( cY, sX ) ),
+        Vector3( spu_add( spu_mul( tmp0, cX ), spu_mul( sZ, sX ) ), spu_sub( spu_mul( tmp1, cX ), spu_mul( cZ, sX ) ), spu_mul( cY, cX ) ),
+        Vector3( spu_splats(0.0f) )
+    );
+}
+
+inline const Transform3 Transform3::rotation( vec_float4 radians, const Vector3 & unitVec )
+{
+    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( spu_splats(0.0f) ) );
+}
+
+inline const Transform3 Transform3::rotation( const Quat & unitQuat )
+{
+    return Transform3( Matrix3( unitQuat ), Vector3( spu_splats(0.0f) ) );
+}
+
+inline const Transform3 Transform3::scale( const Vector3 & scaleVec )
+{
+    return Transform3(
+        Vector3( scaleVec.getX(), spu_splats(0.0f), spu_splats(0.0f) ),
+        Vector3( spu_splats(0.0f), scaleVec.getY(), spu_splats(0.0f) ),
+        Vector3( spu_splats(0.0f), spu_splats(0.0f), scaleVec.getZ() ),
+        Vector3( spu_splats(0.0f) )
+    );
+}
+
+inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec )
+{
+    return Transform3(
+        ( tfrm.getCol0() * scaleVec.getX( ) ),
+        ( tfrm.getCol1() * scaleVec.getY( ) ),
+        ( tfrm.getCol2() * scaleVec.getZ( ) ),
+        tfrm.getCol3()
+    );
+}
+
+inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm )
+{
+    return Transform3(
+        mulPerElem( tfrm.getCol0(), scaleVec ),
+        mulPerElem( tfrm.getCol1(), scaleVec ),
+        mulPerElem( tfrm.getCol2(), scaleVec ),
+        mulPerElem( tfrm.getCol3(), scaleVec )
+    );
+}
+
+inline const Transform3 Transform3::translation( const Vector3 & translateVec )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        translateVec
+    );
+}
+
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, vec_uint4 select1 )
+{
+    return Transform3(
+        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
+        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
+        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
+        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Transform3 & tfrm )
+{
+    Aos::Transform3 mat0, mat1, mat2, mat3;
+    tfrm.get4Aos( mat0, mat1, mat2, mat3 );
+    printf("slot 0:\n");
+    print( mat0 );
+    printf("slot 1:\n");
+    print( mat1 );
+    printf("slot 2:\n");
+    print( mat2 );
+    printf("slot 3:\n");
+    print( mat3 );
+}
+
+inline void print( const Transform3 & tfrm, const char * name )
+{
+    printf("%s:\n", name);
+    print( tfrm );
+}
+
+#endif
+
+inline Quat::Quat( const Matrix3 & tfrm )
+{
+    vec_float4 trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
+    vec_uint4 negTrace, ZgtX, ZgtY, YgtX;
+    vec_uint4 largestXorY, largestYorZ, largestZorX;
+
+    xx = tfrm.getCol0().getX();
+    yx = tfrm.getCol0().getY();
+    zx = tfrm.getCol0().getZ();
+    xy = tfrm.getCol1().getX();
+    yy = tfrm.getCol1().getY();
+    zy = tfrm.getCol1().getZ();
+    xz = tfrm.getCol2().getX();
+    yz = tfrm.getCol2().getY();
+    zz = tfrm.getCol2().getZ();
+
+    trace = spu_add( spu_add( xx, yy ), zz );
+
+    negTrace = spu_cmpgt( spu_splats(0.0f), trace );
+    ZgtX = spu_cmpgt( zz, xx );
+    ZgtY = spu_cmpgt( zz, yy );
+    YgtX = spu_cmpgt( yy, xx );
+    largestXorY = spu_and( negTrace, spu_nand( ZgtX, ZgtY ) );
+    largestYorZ = spu_and( negTrace, spu_or( YgtX, ZgtX ) );
+    largestZorX = spu_and( negTrace, spu_orc( ZgtY, YgtX ) );
+    
+    zz = spu_sel( zz, negatef4(zz), largestXorY );
+    xy = spu_sel( xy, negatef4(xy), largestXorY );
+    xx = spu_sel( xx, negatef4(xx), largestYorZ );
+    yz = spu_sel( yz, negatef4(yz), largestYorZ );
+    yy = spu_sel( yy, negatef4(yy), largestZorX );
+    zx = spu_sel( zx, negatef4(zx), largestZorX );
+
+    radicand = spu_add( spu_add( spu_add( xx, yy ), zz ), spu_splats(1.0f) );
+    scale = spu_mul( spu_splats(0.5f), rsqrtf4( radicand ) );
+
+    tmpx = spu_mul( spu_sub( zy, yz ), scale );
+    tmpy = spu_mul( spu_sub( xz, zx ), scale );
+    tmpz = spu_mul( spu_sub( yx, xy ), scale );
+    tmpw = spu_mul( radicand, scale );
+    qx = tmpx;
+    qy = tmpy;
+    qz = tmpz;
+    qw = tmpw;
+
+    qx = spu_sel( qx, tmpw, largestXorY );
+    qy = spu_sel( qy, tmpz, largestXorY );
+    qz = spu_sel( qz, tmpy, largestXorY );
+    qw = spu_sel( qw, tmpx, largestXorY );
+    tmpx = qx;
+    tmpz = qz;
+    qx = spu_sel( qx, qy, largestYorZ );
+    qy = spu_sel( qy, tmpx, largestYorZ );
+    qz = spu_sel( qz, qw, largestYorZ );
+    qw = spu_sel( qw, tmpz, largestYorZ );
+
+    mX = qx;
+    mY = qy;
+    mZ = qz;
+    mW = qw;
+}
+
+inline const Matrix3 outer( const Vector3 & tfrm0, const Vector3 & tfrm1 )
+{
+    return Matrix3(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) )
+    );
+}
+
+inline const Matrix4 outer( const Vector4 & tfrm0, const Vector4 & tfrm1 )
+{
+    return Matrix4(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) ),
+        ( tfrm0 * tfrm1.getW( ) )
+    );
+}
+
+inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat )
+{
+    return Vector3(
+        spu_add( spu_add( spu_mul( vec.getX(), mat.getCol0().getX() ), spu_mul( vec.getY(), mat.getCol0().getY() ) ), spu_mul( vec.getZ(), mat.getCol0().getZ() ) ),
+        spu_add( spu_add( spu_mul( vec.getX(), mat.getCol1().getX() ), spu_mul( vec.getY(), mat.getCol1().getY() ) ), spu_mul( vec.getZ(), mat.getCol1().getZ() ) ),
+        spu_add( spu_add( spu_mul( vec.getX(), mat.getCol2().getX() ), spu_mul( vec.getY(), mat.getCol2().getY() ) ), spu_mul( vec.getZ(), mat.getCol2().getZ() ) )
+    );
+}
+
+inline const Matrix3 crossMatrix( const Vector3 & vec )
+{
+    return Matrix3(
+        Vector3( spu_splats(0.0f), vec.getZ(), negatef4( vec.getY() ) ),
+        Vector3( negatef4( vec.getZ() ), spu_splats(0.0f), vec.getX() ),
+        Vector3( vec.getY(), negatef4( vec.getX() ), spu_splats(0.0f) )
+    );
+}
+
+inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat )
+{
+    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
+}
+
+} // namespace Soa
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/cpp/quat_aos.h b/Extras/vectormathlibrary/include/vectormath/spu/cpp/quat_aos.h
index 88f30de00..a41bc69aa 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/cpp/quat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/cpp/quat_aos.h
@@ -1,417 +1,417 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_AOS_CPP_H
-#define _VECTORMATH_QUAT_AOS_CPP_H
-//-----------------------------------------------------------------------------
-// Definitions
-
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-namespace Vectormath {
-namespace Aos {
-
-inline Quat::Quat( float _x, float _y, float _z, float _w )
-{
-    mVec128 = (vec_float4){ _x, _y, _z, _w };
-}
-
-inline Quat::Quat( Vector3 xyz, float _w )
-{
-    mVec128 = spu_shuffle( xyz.get128(), spu_promote( _w, 0 ), _VECTORMATH_SHUF_XYZA );
-}
-
-inline Quat::Quat( Vector4 vec )
-{
-    mVec128 = vec.get128();
-}
-
-inline Quat::Quat( float scalar )
-{
-    mVec128 = spu_splats( scalar );
-}
-
-inline Quat::Quat( vec_float4 vf4 )
-{
-    mVec128 = vf4;
-}
-
-inline const Quat Quat::identity( )
-{
-    return Quat( _VECTORMATH_UNIT_0001 );
-}
-
-inline const Quat lerp( float t, Quat quat0, Quat quat1 )
-{
-    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
-}
-
-inline const Quat slerp( float t, Quat unitQuat0, Quat unitQuat1 )
-{
-    Quat start;
-    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    vec_uint4 selectMask;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    cosAngle = _vmathVfDot4( unitQuat0.get128(), unitQuat1.get128() );
-    cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(0.0f), cosAngle );
-    cosAngle = spu_sel( cosAngle, negatef4( cosAngle ), selectMask );
-    start = Quat( spu_sel( unitQuat0.get128(), negatef4( unitQuat0.get128() ), selectMask ) );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = spu_splats(t);
-    oneMinusT = spu_sub( spu_splats(1.0f), tttt );
-    angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
-    angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
-    angles = spu_mul( angles, angle );
-    sines = sinf4( angles );
-    scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
-    scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
-    scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
-    return Quat( spu_madd( start.get128(), scale0, spu_mul( unitQuat1.get128(), scale1 ) ) );
-}
-
-inline const Quat squad( float t, Quat unitQuat0, Quat unitQuat1, Quat unitQuat2, Quat unitQuat3 )
-{
-    Quat tmp0, tmp1;
-    tmp0 = slerp( t, unitQuat0, unitQuat3 );
-    tmp1 = slerp( t, unitQuat1, unitQuat2 );
-    return slerp( ( ( 2.0f * t ) * ( 1.0f - t ) ), tmp0, tmp1 );
-}
-
-inline vec_float4 Quat::get128( ) const
-{
-    return mVec128;
-}
-
-inline Quat & Quat::operator =( Quat quat )
-{
-    mVec128 = quat.mVec128;
-    return *this;
-}
-
-inline Quat & Quat::setXYZ( Vector3 vec )
-{
-    mVec128 = spu_sel( vec.get128(), mVec128, (vec_uint4)spu_maskb(0x000f) );
-    return *this;
-}
-
-inline const Vector3 Quat::getXYZ( ) const
-{
-    return Vector3( mVec128 );
-}
-
-inline Quat & Quat::setX( float _x )
-{
-    mVec128 = spu_insert( _x, mVec128, 0 );
-    return *this;
-}
-
-inline float Quat::getX( ) const
-{
-    return spu_extract( mVec128, 0 );
-}
-
-inline Quat & Quat::setY( float _y )
-{
-    mVec128 = spu_insert( _y, mVec128, 1 );
-    return *this;
-}
-
-inline float Quat::getY( ) const
-{
-    return spu_extract( mVec128, 1 );
-}
-
-inline Quat & Quat::setZ( float _z )
-{
-    mVec128 = spu_insert( _z, mVec128, 2 );
-    return *this;
-}
-
-inline float Quat::getZ( ) const
-{
-    return spu_extract( mVec128, 2 );
-}
-
-inline Quat & Quat::setW( float _w )
-{
-    mVec128 = spu_insert( _w, mVec128, 3 );
-    return *this;
-}
-
-inline float Quat::getW( ) const
-{
-    return spu_extract( mVec128, 3 );
-}
-
-inline Quat & Quat::setElem( int idx, float value )
-{
-    mVec128 = spu_insert( value, mVec128, idx );
-    return *this;
-}
-
-inline float Quat::getElem( int idx ) const
-{
-    return spu_extract( mVec128, idx );
-}
-
-inline VecIdx Quat::operator []( int idx )
-{
-    return VecIdx( mVec128, idx );
-}
-
-inline float Quat::operator []( int idx ) const
-{
-    return spu_extract( mVec128, idx );
-}
-
-inline const Quat Quat::operator +( Quat quat ) const
-{
-    return Quat( spu_add( mVec128, quat.mVec128 ) );
-}
-
-inline const Quat Quat::operator -( Quat quat ) const
-{
-    return Quat( spu_sub( mVec128, quat.mVec128 ) );
-}
-
-inline const Quat Quat::operator *( float scalar ) const
-{
-    return Quat( spu_mul( mVec128, spu_splats(scalar) ) );
-}
-
-inline Quat & Quat::operator +=( Quat quat )
-{
-    *this = *this + quat;
-    return *this;
-}
-
-inline Quat & Quat::operator -=( Quat quat )
-{
-    *this = *this - quat;
-    return *this;
-}
-
-inline Quat & Quat::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Quat Quat::operator /( float scalar ) const
-{
-    return Quat( divf4( mVec128, spu_splats(scalar) ) );
-}
-
-inline Quat & Quat::operator /=( float scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Quat Quat::operator -( ) const
-{
-    return Quat( negatef4( mVec128 ) );
-}
-
-inline const Quat operator *( float scalar, Quat quat )
-{
-    return quat * scalar;
-}
-
-inline float dot( Quat quat0, Quat quat1 )
-{
-    return spu_extract( _vmathVfDot4( quat0.get128(), quat1.get128() ), 0 );
-}
-
-inline float norm( Quat quat )
-{
-    return spu_extract( _vmathVfDot4( quat.get128(), quat.get128() ), 0 );
-}
-
-inline float length( Quat quat )
-{
-    return sqrtf( norm( quat ) );
-}
-
-inline const Quat normalize( Quat quat )
-{
-    vec_float4 dot = _vmathVfDot4( quat.get128(), quat.get128() );
-    return Quat( spu_mul( quat.get128(), rsqrtf4( dot ) ) );
-}
-
-inline const Quat Quat::rotation( Vector3 unitVec0, Vector3 unitVec1 )
-{
-    Vector3 crossVec;
-    vec_float4 cosAngle, cosAngleX2Plus2, recipCosHalfAngleX2, cosHalfAngleX2, res;
-    cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
-    cosAngle = spu_shuffle( cosAngle, cosAngle, (vec_uchar16)spu_splats(0x00010203) );
-    cosAngleX2Plus2 = spu_madd( cosAngle, spu_splats(2.0f), spu_splats(2.0f) );
-    recipCosHalfAngleX2 = rsqrtf4( cosAngleX2Plus2 );
-    cosHalfAngleX2 = spu_mul( recipCosHalfAngleX2, cosAngleX2Plus2 );
-    crossVec = cross( unitVec0, unitVec1 );
-    res = spu_mul( crossVec.get128(), recipCosHalfAngleX2 );
-    res = spu_sel( res, spu_mul( cosHalfAngleX2, spu_splats(0.5f) ), (vec_uint4)spu_maskb(0x000f) );
-    return Quat( res );
-}
-
-inline const Quat Quat::rotation( float radians, Vector3 unitVec )
-{
-    vec_float4 s, c, angle, res;
-    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    res = spu_sel( spu_mul( unitVec.get128(), s ), c, (vec_uint4)spu_maskb(0x000f) );
-    return Quat( res );
-}
-
-inline const Quat Quat::rotationX( float radians )
-{
-    vec_float4 s, c, angle, res;
-    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    res = spu_sel( spu_splats(0.0f), s, (vec_uint4)spu_maskb(0xf000) );
-    res = spu_sel( res, c, (vec_uint4)spu_maskb(0x000f) );
-    return Quat( res );
-}
-
-inline const Quat Quat::rotationY( float radians )
-{
-    vec_float4 s, c, angle, res;
-    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    res = spu_sel( spu_splats(0.0f), s, (vec_uint4)spu_maskb(0x0f00) );
-    res = spu_sel( res, c, (vec_uint4)spu_maskb(0x000f) );
-    return Quat( res );
-}
-
-inline const Quat Quat::rotationZ( float radians )
-{
-    vec_float4 s, c, angle, res;
-    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    res = spu_sel( spu_splats(0.0f), s, (vec_uint4)spu_maskb(0x00f0) );
-    res = spu_sel( res, c, (vec_uint4)spu_maskb(0x000f) );
-    return Quat( res );
-}
-
-inline const Quat Quat::operator *( Quat quat ) const
-{
-    vec_float4 ldata, rdata, qv, tmp0, tmp1, tmp2, tmp3;
-    vec_float4 product, l_wxyz, r_wxyz, xy, qw;
-    ldata = mVec128;
-    rdata = quat.mVec128;
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
-    tmp0 = spu_shuffle( ldata, ldata, _VECTORMATH_SHUF_YZXW );
-    tmp1 = spu_shuffle( rdata, rdata, _VECTORMATH_SHUF_ZXYW );
-    tmp2 = spu_shuffle( ldata, ldata, _VECTORMATH_SHUF_ZXYW );
-    tmp3 = spu_shuffle( rdata, rdata, _VECTORMATH_SHUF_YZXW );
-    qv = spu_mul( spu_shuffle( ldata, ldata, shuffle_wwww ), rdata );
-    qv = spu_madd( spu_shuffle( rdata, rdata, shuffle_wwww ), ldata, qv );
-    qv = spu_madd( tmp0, tmp1, qv );
-    qv = spu_nmsub( tmp2, tmp3, qv );
-    product = spu_mul( ldata, rdata );
-    l_wxyz = spu_rlqwbyte( ldata, 12 );
-    r_wxyz = spu_rlqwbyte( rdata, 12 );
-    qw = spu_nmsub( l_wxyz, r_wxyz, product );
-    xy = spu_madd( l_wxyz, r_wxyz, product );
-    qw = spu_sub( qw, spu_rlqwbyte( xy, 8 ) );
-    return Quat( spu_sel( qv, qw, (vec_uint4)spu_maskb( 0x000f ) ) );
-}
-
-inline Quat & Quat::operator *=( Quat quat )
-{
-    *this = *this * quat;
-    return *this;
-}
-
-inline const Vector3 rotate( Quat quat, Vector3 vec )
-{
-    vec_float4 qdata, vdata, product, tmp0, tmp1, tmp2, tmp3, wwww, qv, qw, res;
-    qdata = quat.get128();
-    vdata = vec.get128();
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
-    tmp0 = spu_shuffle( qdata, qdata, _VECTORMATH_SHUF_YZXW );
-    tmp1 = spu_shuffle( vdata, vdata, _VECTORMATH_SHUF_ZXYW );
-    tmp2 = spu_shuffle( qdata, qdata, _VECTORMATH_SHUF_ZXYW );
-    tmp3 = spu_shuffle( vdata, vdata, _VECTORMATH_SHUF_YZXW );
-    wwww = spu_shuffle( qdata, qdata, shuffle_wwww );
-    qv = spu_mul( wwww, vdata );
-    qv = spu_madd( tmp0, tmp1, qv );
-    qv = spu_nmsub( tmp2, tmp3, qv );
-    product = spu_mul( qdata, vdata );
-    qw = spu_madd( spu_rlqwbyte( qdata, 4 ), spu_rlqwbyte( vdata, 4 ), product );
-    qw = spu_add( spu_rlqwbyte( product, 8 ), qw );
-    tmp1 = spu_shuffle( qv, qv, _VECTORMATH_SHUF_ZXYW );
-    tmp3 = spu_shuffle( qv, qv, _VECTORMATH_SHUF_YZXW );
-    res = spu_mul( spu_shuffle( qw, qw, shuffle_xxxx ), qdata );
-    res = spu_madd( wwww, qv, res );
-    res = spu_madd( tmp0, tmp1, res );
-    res = spu_nmsub( tmp2, tmp3, res );
-    return Vector3( res );
-}
-
-inline const Quat conj( Quat quat )
-{
-    return Quat( spu_xor( quat.get128(), ((vec_float4)(vec_int4){0x80000000,0x80000000,0x80000000,0}) ) );
-}
-
-inline const Quat select( Quat quat0, Quat quat1, bool select1 )
-{
-    return Quat( spu_sel( quat0.get128(), quat1.get128(), spu_splats( (unsigned int)-(select1 > 0) ) ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( Quat quat )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = quat.get128();
-    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-inline void print( Quat quat, const char * name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = quat.get128();
-    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-#endif
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_AOS_CPP_H
+#define _VECTORMATH_QUAT_AOS_CPP_H
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+namespace Vectormath {
+namespace Aos {
+
+inline Quat::Quat( float _x, float _y, float _z, float _w )
+{
+    mVec128 = (vec_float4){ _x, _y, _z, _w };
+}
+
+inline Quat::Quat( Vector3 xyz, float _w )
+{
+    mVec128 = spu_shuffle( xyz.get128(), spu_promote( _w, 0 ), _VECTORMATH_SHUF_XYZA );
+}
+
+inline Quat::Quat( Vector4 vec )
+{
+    mVec128 = vec.get128();
+}
+
+inline Quat::Quat( float scalar )
+{
+    mVec128 = spu_splats( scalar );
+}
+
+inline Quat::Quat( vec_float4 vf4 )
+{
+    mVec128 = vf4;
+}
+
+inline const Quat Quat::identity( )
+{
+    return Quat( _VECTORMATH_UNIT_0001 );
+}
+
+inline const Quat lerp( float t, Quat quat0, Quat quat1 )
+{
+    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
+}
+
+inline const Quat slerp( float t, Quat unitQuat0, Quat unitQuat1 )
+{
+    Quat start;
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    vec_uint4 selectMask;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    cosAngle = _vmathVfDot4( unitQuat0.get128(), unitQuat1.get128() );
+    cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(0.0f), cosAngle );
+    cosAngle = spu_sel( cosAngle, negatef4( cosAngle ), selectMask );
+    start = Quat( spu_sel( unitQuat0.get128(), negatef4( unitQuat0.get128() ), selectMask ) );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = spu_splats(t);
+    oneMinusT = spu_sub( spu_splats(1.0f), tttt );
+    angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
+    angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
+    angles = spu_mul( angles, angle );
+    sines = sinf4( angles );
+    scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
+    scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
+    scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
+    return Quat( spu_madd( start.get128(), scale0, spu_mul( unitQuat1.get128(), scale1 ) ) );
+}
+
+inline const Quat squad( float t, Quat unitQuat0, Quat unitQuat1, Quat unitQuat2, Quat unitQuat3 )
+{
+    Quat tmp0, tmp1;
+    tmp0 = slerp( t, unitQuat0, unitQuat3 );
+    tmp1 = slerp( t, unitQuat1, unitQuat2 );
+    return slerp( ( ( 2.0f * t ) * ( 1.0f - t ) ), tmp0, tmp1 );
+}
+
+inline vec_float4 Quat::get128( ) const
+{
+    return mVec128;
+}
+
+inline Quat & Quat::operator =( Quat quat )
+{
+    mVec128 = quat.mVec128;
+    return *this;
+}
+
+inline Quat & Quat::setXYZ( Vector3 vec )
+{
+    mVec128 = spu_sel( vec.get128(), mVec128, (vec_uint4)spu_maskb(0x000f) );
+    return *this;
+}
+
+inline const Vector3 Quat::getXYZ( ) const
+{
+    return Vector3( mVec128 );
+}
+
+inline Quat & Quat::setX( float _x )
+{
+    mVec128 = spu_insert( _x, mVec128, 0 );
+    return *this;
+}
+
+inline float Quat::getX( ) const
+{
+    return spu_extract( mVec128, 0 );
+}
+
+inline Quat & Quat::setY( float _y )
+{
+    mVec128 = spu_insert( _y, mVec128, 1 );
+    return *this;
+}
+
+inline float Quat::getY( ) const
+{
+    return spu_extract( mVec128, 1 );
+}
+
+inline Quat & Quat::setZ( float _z )
+{
+    mVec128 = spu_insert( _z, mVec128, 2 );
+    return *this;
+}
+
+inline float Quat::getZ( ) const
+{
+    return spu_extract( mVec128, 2 );
+}
+
+inline Quat & Quat::setW( float _w )
+{
+    mVec128 = spu_insert( _w, mVec128, 3 );
+    return *this;
+}
+
+inline float Quat::getW( ) const
+{
+    return spu_extract( mVec128, 3 );
+}
+
+inline Quat & Quat::setElem( int idx, float value )
+{
+    mVec128 = spu_insert( value, mVec128, idx );
+    return *this;
+}
+
+inline float Quat::getElem( int idx ) const
+{
+    return spu_extract( mVec128, idx );
+}
+
+inline VecIdx Quat::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+inline float Quat::operator []( int idx ) const
+{
+    return spu_extract( mVec128, idx );
+}
+
+inline const Quat Quat::operator +( Quat quat ) const
+{
+    return Quat( spu_add( mVec128, quat.mVec128 ) );
+}
+
+inline const Quat Quat::operator -( Quat quat ) const
+{
+    return Quat( spu_sub( mVec128, quat.mVec128 ) );
+}
+
+inline const Quat Quat::operator *( float scalar ) const
+{
+    return Quat( spu_mul( mVec128, spu_splats(scalar) ) );
+}
+
+inline Quat & Quat::operator +=( Quat quat )
+{
+    *this = *this + quat;
+    return *this;
+}
+
+inline Quat & Quat::operator -=( Quat quat )
+{
+    *this = *this - quat;
+    return *this;
+}
+
+inline Quat & Quat::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Quat Quat::operator /( float scalar ) const
+{
+    return Quat( divf4( mVec128, spu_splats(scalar) ) );
+}
+
+inline Quat & Quat::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Quat Quat::operator -( ) const
+{
+    return Quat( negatef4( mVec128 ) );
+}
+
+inline const Quat operator *( float scalar, Quat quat )
+{
+    return quat * scalar;
+}
+
+inline float dot( Quat quat0, Quat quat1 )
+{
+    return spu_extract( _vmathVfDot4( quat0.get128(), quat1.get128() ), 0 );
+}
+
+inline float norm( Quat quat )
+{
+    return spu_extract( _vmathVfDot4( quat.get128(), quat.get128() ), 0 );
+}
+
+inline float length( Quat quat )
+{
+    return sqrtf( norm( quat ) );
+}
+
+inline const Quat normalize( Quat quat )
+{
+    vec_float4 dot = _vmathVfDot4( quat.get128(), quat.get128() );
+    return Quat( spu_mul( quat.get128(), rsqrtf4( dot ) ) );
+}
+
+inline const Quat Quat::rotation( Vector3 unitVec0, Vector3 unitVec1 )
+{
+    Vector3 crossVec;
+    vec_float4 cosAngle, cosAngleX2Plus2, recipCosHalfAngleX2, cosHalfAngleX2, res;
+    cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
+    cosAngle = spu_shuffle( cosAngle, cosAngle, (vec_uchar16)spu_splats(0x00010203) );
+    cosAngleX2Plus2 = spu_madd( cosAngle, spu_splats(2.0f), spu_splats(2.0f) );
+    recipCosHalfAngleX2 = rsqrtf4( cosAngleX2Plus2 );
+    cosHalfAngleX2 = spu_mul( recipCosHalfAngleX2, cosAngleX2Plus2 );
+    crossVec = cross( unitVec0, unitVec1 );
+    res = spu_mul( crossVec.get128(), recipCosHalfAngleX2 );
+    res = spu_sel( res, spu_mul( cosHalfAngleX2, spu_splats(0.5f) ), (vec_uint4)spu_maskb(0x000f) );
+    return Quat( res );
+}
+
+inline const Quat Quat::rotation( float radians, Vector3 unitVec )
+{
+    vec_float4 s, c, angle, res;
+    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    res = spu_sel( spu_mul( unitVec.get128(), s ), c, (vec_uint4)spu_maskb(0x000f) );
+    return Quat( res );
+}
+
+inline const Quat Quat::rotationX( float radians )
+{
+    vec_float4 s, c, angle, res;
+    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    res = spu_sel( spu_splats(0.0f), s, (vec_uint4)spu_maskb(0xf000) );
+    res = spu_sel( res, c, (vec_uint4)spu_maskb(0x000f) );
+    return Quat( res );
+}
+
+inline const Quat Quat::rotationY( float radians )
+{
+    vec_float4 s, c, angle, res;
+    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    res = spu_sel( spu_splats(0.0f), s, (vec_uint4)spu_maskb(0x0f00) );
+    res = spu_sel( res, c, (vec_uint4)spu_maskb(0x000f) );
+    return Quat( res );
+}
+
+inline const Quat Quat::rotationZ( float radians )
+{
+    vec_float4 s, c, angle, res;
+    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    res = spu_sel( spu_splats(0.0f), s, (vec_uint4)spu_maskb(0x00f0) );
+    res = spu_sel( res, c, (vec_uint4)spu_maskb(0x000f) );
+    return Quat( res );
+}
+
+inline const Quat Quat::operator *( Quat quat ) const
+{
+    vec_float4 ldata, rdata, qv, tmp0, tmp1, tmp2, tmp3;
+    vec_float4 product, l_wxyz, r_wxyz, xy, qw;
+    ldata = mVec128;
+    rdata = quat.mVec128;
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
+    tmp0 = spu_shuffle( ldata, ldata, _VECTORMATH_SHUF_YZXW );
+    tmp1 = spu_shuffle( rdata, rdata, _VECTORMATH_SHUF_ZXYW );
+    tmp2 = spu_shuffle( ldata, ldata, _VECTORMATH_SHUF_ZXYW );
+    tmp3 = spu_shuffle( rdata, rdata, _VECTORMATH_SHUF_YZXW );
+    qv = spu_mul( spu_shuffle( ldata, ldata, shuffle_wwww ), rdata );
+    qv = spu_madd( spu_shuffle( rdata, rdata, shuffle_wwww ), ldata, qv );
+    qv = spu_madd( tmp0, tmp1, qv );
+    qv = spu_nmsub( tmp2, tmp3, qv );
+    product = spu_mul( ldata, rdata );
+    l_wxyz = spu_rlqwbyte( ldata, 12 );
+    r_wxyz = spu_rlqwbyte( rdata, 12 );
+    qw = spu_nmsub( l_wxyz, r_wxyz, product );
+    xy = spu_madd( l_wxyz, r_wxyz, product );
+    qw = spu_sub( qw, spu_rlqwbyte( xy, 8 ) );
+    return Quat( spu_sel( qv, qw, (vec_uint4)spu_maskb( 0x000f ) ) );
+}
+
+inline Quat & Quat::operator *=( Quat quat )
+{
+    *this = *this * quat;
+    return *this;
+}
+
+inline const Vector3 rotate( Quat quat, Vector3 vec )
+{
+    vec_float4 qdata, vdata, product, tmp0, tmp1, tmp2, tmp3, wwww, qv, qw, res;
+    qdata = quat.get128();
+    vdata = vec.get128();
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
+    tmp0 = spu_shuffle( qdata, qdata, _VECTORMATH_SHUF_YZXW );
+    tmp1 = spu_shuffle( vdata, vdata, _VECTORMATH_SHUF_ZXYW );
+    tmp2 = spu_shuffle( qdata, qdata, _VECTORMATH_SHUF_ZXYW );
+    tmp3 = spu_shuffle( vdata, vdata, _VECTORMATH_SHUF_YZXW );
+    wwww = spu_shuffle( qdata, qdata, shuffle_wwww );
+    qv = spu_mul( wwww, vdata );
+    qv = spu_madd( tmp0, tmp1, qv );
+    qv = spu_nmsub( tmp2, tmp3, qv );
+    product = spu_mul( qdata, vdata );
+    qw = spu_madd( spu_rlqwbyte( qdata, 4 ), spu_rlqwbyte( vdata, 4 ), product );
+    qw = spu_add( spu_rlqwbyte( product, 8 ), qw );
+    tmp1 = spu_shuffle( qv, qv, _VECTORMATH_SHUF_ZXYW );
+    tmp3 = spu_shuffle( qv, qv, _VECTORMATH_SHUF_YZXW );
+    res = spu_mul( spu_shuffle( qw, qw, shuffle_xxxx ), qdata );
+    res = spu_madd( wwww, qv, res );
+    res = spu_madd( tmp0, tmp1, res );
+    res = spu_nmsub( tmp2, tmp3, res );
+    return Vector3( res );
+}
+
+inline const Quat conj( Quat quat )
+{
+    return Quat( spu_xor( quat.get128(), ((vec_float4)(vec_int4){0x80000000,0x80000000,0x80000000,0}) ) );
+}
+
+inline const Quat select( Quat quat0, Quat quat1, bool select1 )
+{
+    return Quat( spu_sel( quat0.get128(), quat1.get128(), spu_splats( (unsigned int)-(select1 > 0) ) ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( Quat quat )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = quat.get128();
+    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+inline void print( Quat quat, const char * name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = quat.get128();
+    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/cpp/quat_soa.h b/Extras/vectormathlibrary/include/vectormath/spu/cpp/quat_soa.h
index 675457f04..88c2884fa 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/cpp/quat_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/cpp/quat_soa.h
@@ -1,483 +1,483 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_SOA_CPP_H
-#define _VECTORMATH_QUAT_SOA_CPP_H
-//-----------------------------------------------------------------------------
-// Definitions
-
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-namespace Vectormath {
-namespace Soa {
-
-inline Quat::Quat( const Quat & quat )
-{
-    mX = quat.mX;
-    mY = quat.mY;
-    mZ = quat.mZ;
-    mW = quat.mW;
-}
-
-inline Quat::Quat( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-    mW = _w;
-}
-
-inline Quat::Quat( const Vector3 & xyz, vec_float4 _w )
-{
-    this->setXYZ( xyz );
-    this->setW( _w );
-}
-
-inline Quat::Quat( const Vector4 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    mW = vec.getW();
-}
-
-inline Quat::Quat( vec_float4 scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-    mW = scalar;
-}
-
-inline Quat::Quat( Aos::Quat quat )
-{
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
-    vec_float4 vec128 = quat.get128();
-    mX = spu_shuffle( vec128, vec128, shuffle_xxxx );
-    mY = spu_shuffle( vec128, vec128, shuffle_yyyy );
-    mZ = spu_shuffle( vec128, vec128, shuffle_zzzz );
-    mW = spu_shuffle( vec128, vec128, shuffle_wwww );
-}
-
-inline Quat::Quat( Aos::Quat quat0, Aos::Quat quat1, Aos::Quat quat2, Aos::Quat quat3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = spu_shuffle( quat0.get128(), quat2.get128(), _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( quat1.get128(), quat3.get128(), _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( quat0.get128(), quat2.get128(), _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( quat1.get128(), quat3.get128(), _VECTORMATH_SHUF_ZCWD );
-    mX = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
-    mY = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
-    mZ = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
-    mW = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
-}
-
-inline const Quat Quat::identity( )
-{
-    return Quat( spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
-}
-
-inline const Quat lerp( vec_float4 t, const Quat & quat0, const Quat & quat1 )
-{
-    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
-}
-
-inline const Quat slerp( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1 )
-{
-    Quat start;
-    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
-    vec_uint4 selectMask;
-    cosAngle = dot( unitQuat0, unitQuat1 );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(0.0f), cosAngle );
-    cosAngle = spu_sel( cosAngle, negatef4( cosAngle ), selectMask );
-    start.setX( spu_sel( unitQuat0.getX(), negatef4( unitQuat0.getX() ), selectMask ) );
-    start.setY( spu_sel( unitQuat0.getY(), negatef4( unitQuat0.getY() ), selectMask ) );
-    start.setZ( spu_sel( unitQuat0.getZ(), negatef4( unitQuat0.getZ() ), selectMask ) );
-    start.setW( spu_sel( unitQuat0.getW(), negatef4( unitQuat0.getW() ), selectMask ) );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    recipSinAngle = recipf4( sinf4( angle ) );
-    scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
-    scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
-    return ( ( start * scale0 ) + ( unitQuat1 * scale1 ) );
-}
-
-inline const Quat squad( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 )
-{
-    Quat tmp0, tmp1;
-    tmp0 = slerp( t, unitQuat0, unitQuat3 );
-    tmp1 = slerp( t, unitQuat1, unitQuat2 );
-    return slerp( spu_mul( spu_mul( spu_splats(2.0f), t ), spu_sub( spu_splats(1.0f), t ) ), tmp0, tmp1 );
-}
-
-inline void Quat::get4Aos( Aos::Quat & result0, Aos::Quat & result1, Aos::Quat & result2, Aos::Quat & result3 ) const
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( mY, mW, _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( mY, mW, _VECTORMATH_SHUF_ZCWD );
-    result0 = Aos::Quat( spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB ) );
-    result1 = Aos::Quat( spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD ) );
-    result2 = Aos::Quat( spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB ) );
-    result3 = Aos::Quat( spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD ) );
-}
-
-inline Quat & Quat::operator =( const Quat & quat )
-{
-    mX = quat.mX;
-    mY = quat.mY;
-    mZ = quat.mZ;
-    mW = quat.mW;
-    return *this;
-}
-
-inline Quat & Quat::setXYZ( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    return *this;
-}
-
-inline const Vector3 Quat::getXYZ( ) const
-{
-    return Vector3( mX, mY, mZ );
-}
-
-inline Quat & Quat::setX( vec_float4 _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline vec_float4 Quat::getX( ) const
-{
-    return mX;
-}
-
-inline Quat & Quat::setY( vec_float4 _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline vec_float4 Quat::getY( ) const
-{
-    return mY;
-}
-
-inline Quat & Quat::setZ( vec_float4 _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline vec_float4 Quat::getZ( ) const
-{
-    return mZ;
-}
-
-inline Quat & Quat::setW( vec_float4 _w )
-{
-    mW = _w;
-    return *this;
-}
-
-inline vec_float4 Quat::getW( ) const
-{
-    return mW;
-}
-
-inline Quat & Quat::setElem( int idx, vec_float4 value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline vec_float4 Quat::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline Quat::vec_float4_t & Quat::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline vec_float4 Quat::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Quat Quat::operator +( const Quat & quat ) const
-{
-    return Quat(
-        spu_add( mX, quat.mX ),
-        spu_add( mY, quat.mY ),
-        spu_add( mZ, quat.mZ ),
-        spu_add( mW, quat.mW )
-    );
-}
-
-inline const Quat Quat::operator -( const Quat & quat ) const
-{
-    return Quat(
-        spu_sub( mX, quat.mX ),
-        spu_sub( mY, quat.mY ),
-        spu_sub( mZ, quat.mZ ),
-        spu_sub( mW, quat.mW )
-    );
-}
-
-inline const Quat Quat::operator *( vec_float4 scalar ) const
-{
-    return Quat(
-        spu_mul( mX, scalar ),
-        spu_mul( mY, scalar ),
-        spu_mul( mZ, scalar ),
-        spu_mul( mW, scalar )
-    );
-}
-
-inline Quat & Quat::operator +=( const Quat & quat )
-{
-    *this = *this + quat;
-    return *this;
-}
-
-inline Quat & Quat::operator -=( const Quat & quat )
-{
-    *this = *this - quat;
-    return *this;
-}
-
-inline Quat & Quat::operator *=( vec_float4 scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Quat Quat::operator /( vec_float4 scalar ) const
-{
-    return Quat(
-        divf4( mX, scalar ),
-        divf4( mY, scalar ),
-        divf4( mZ, scalar ),
-        divf4( mW, scalar )
-    );
-}
-
-inline Quat & Quat::operator /=( vec_float4 scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Quat Quat::operator -( ) const
-{
-    return Quat(
-        negatef4( mX ),
-        negatef4( mY ),
-        negatef4( mZ ),
-        negatef4( mW )
-    );
-}
-
-inline const Quat operator *( vec_float4 scalar, const Quat & quat )
-{
-    return quat * scalar;
-}
-
-inline vec_float4 dot( const Quat & quat0, const Quat & quat1 )
-{
-    vec_float4 result;
-    result = spu_mul( quat0.getX(), quat1.getX() );
-    result = spu_add( result, spu_mul( quat0.getY(), quat1.getY() ) );
-    result = spu_add( result, spu_mul( quat0.getZ(), quat1.getZ() ) );
-    result = spu_add( result, spu_mul( quat0.getW(), quat1.getW() ) );
-    return result;
-}
-
-inline vec_float4 norm( const Quat & quat )
-{
-    vec_float4 result;
-    result = spu_mul( quat.getX(), quat.getX() );
-    result = spu_add( result, spu_mul( quat.getY(), quat.getY() ) );
-    result = spu_add( result, spu_mul( quat.getZ(), quat.getZ() ) );
-    result = spu_add( result, spu_mul( quat.getW(), quat.getW() ) );
-    return result;
-}
-
-inline vec_float4 length( const Quat & quat )
-{
-    return sqrtf4( norm( quat ) );
-}
-
-inline const Quat normalize( const Quat & quat )
-{
-    vec_float4 lenSqr, lenInv;
-    lenSqr = norm( quat );
-    lenInv = rsqrtf4( lenSqr );
-    return Quat(
-        spu_mul( quat.getX(), lenInv ),
-        spu_mul( quat.getY(), lenInv ),
-        spu_mul( quat.getZ(), lenInv ),
-        spu_mul( quat.getW(), lenInv )
-    );
-}
-
-inline const Quat Quat::rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 )
-{
-    vec_float4 cosHalfAngleX2, recipCosHalfAngleX2;
-    cosHalfAngleX2 = sqrtf4( spu_mul( spu_splats(2.0f), spu_add( spu_splats(1.0f), dot( unitVec0, unitVec1 ) ) ) );
-    recipCosHalfAngleX2 = recipf4( cosHalfAngleX2 );
-    return Quat( ( cross( unitVec0, unitVec1 ) * recipCosHalfAngleX2 ), spu_mul( cosHalfAngleX2, spu_splats(0.5f) ) );
-}
-
-inline const Quat Quat::rotation( vec_float4 radians, const Vector3 & unitVec )
-{
-    vec_float4 s, c, angle;
-    angle = spu_mul( radians, spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    return Quat( ( unitVec * s ), c );
-}
-
-inline const Quat Quat::rotationX( vec_float4 radians )
-{
-    vec_float4 s, c, angle;
-    angle = spu_mul( radians, spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    return Quat( s, spu_splats(0.0f), spu_splats(0.0f), c );
-}
-
-inline const Quat Quat::rotationY( vec_float4 radians )
-{
-    vec_float4 s, c, angle;
-    angle = spu_mul( radians, spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    return Quat( spu_splats(0.0f), s, spu_splats(0.0f), c );
-}
-
-inline const Quat Quat::rotationZ( vec_float4 radians )
-{
-    vec_float4 s, c, angle;
-    angle = spu_mul( radians, spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    return Quat( spu_splats(0.0f), spu_splats(0.0f), s, c );
-}
-
-inline const Quat Quat::operator *( const Quat & quat ) const
-{
-    return Quat(
-        spu_sub( spu_add( spu_add( spu_mul( mW, quat.mX ), spu_mul( mX, quat.mW ) ), spu_mul( mY, quat.mZ ) ), spu_mul( mZ, quat.mY ) ),
-        spu_sub( spu_add( spu_add( spu_mul( mW, quat.mY ), spu_mul( mY, quat.mW ) ), spu_mul( mZ, quat.mX ) ), spu_mul( mX, quat.mZ ) ),
-        spu_sub( spu_add( spu_add( spu_mul( mW, quat.mZ ), spu_mul( mZ, quat.mW ) ), spu_mul( mX, quat.mY ) ), spu_mul( mY, quat.mX ) ),
-        spu_sub( spu_sub( spu_sub( spu_mul( mW, quat.mW ), spu_mul( mX, quat.mX ) ), spu_mul( mY, quat.mY ) ), spu_mul( mZ, quat.mZ ) )
-    );
-}
-
-inline Quat & Quat::operator *=( const Quat & quat )
-{
-    *this = *this * quat;
-    return *this;
-}
-
-inline const Vector3 rotate( const Quat & quat, const Vector3 & vec )
-{
-    vec_float4 tmpX, tmpY, tmpZ, tmpW;
-    tmpX = spu_sub( spu_add( spu_mul( quat.getW(), vec.getX() ), spu_mul( quat.getY(), vec.getZ() ) ), spu_mul( quat.getZ(), vec.getY() ) );
-    tmpY = spu_sub( spu_add( spu_mul( quat.getW(), vec.getY() ), spu_mul( quat.getZ(), vec.getX() ) ), spu_mul( quat.getX(), vec.getZ() ) );
-    tmpZ = spu_sub( spu_add( spu_mul( quat.getW(), vec.getZ() ), spu_mul( quat.getX(), vec.getY() ) ), spu_mul( quat.getY(), vec.getX() ) );
-    tmpW = spu_add( spu_add( spu_mul( quat.getX(), vec.getX() ), spu_mul( quat.getY(), vec.getY() ) ), spu_mul( quat.getZ(), vec.getZ() ) );
-    return Vector3(
-        spu_add( spu_sub( spu_add( spu_mul( tmpW, quat.getX() ), spu_mul( tmpX, quat.getW() ) ), spu_mul( tmpY, quat.getZ() ) ), spu_mul( tmpZ, quat.getY() ) ),
-        spu_add( spu_sub( spu_add( spu_mul( tmpW, quat.getY() ), spu_mul( tmpY, quat.getW() ) ), spu_mul( tmpZ, quat.getX() ) ), spu_mul( tmpX, quat.getZ() ) ),
-        spu_add( spu_sub( spu_add( spu_mul( tmpW, quat.getZ() ), spu_mul( tmpZ, quat.getW() ) ), spu_mul( tmpX, quat.getY() ) ), spu_mul( tmpY, quat.getX() ) )
-    );
-}
-
-inline const Quat conj( const Quat & quat )
-{
-    return Quat( negatef4( quat.getX() ), negatef4( quat.getY() ), negatef4( quat.getZ() ), quat.getW() );
-}
-
-inline const Quat select( const Quat & quat0, const Quat & quat1, vec_uint4 select1 )
-{
-    return Quat(
-        spu_sel( quat0.getX(), quat1.getX(), select1 ),
-        spu_sel( quat0.getY(), quat1.getY(), select1 ),
-        spu_sel( quat0.getZ(), quat1.getZ(), select1 ),
-        spu_sel( quat0.getW(), quat1.getW(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Quat & quat )
-{
-    Aos::Quat vec0, vec1, vec2, vec3;
-    quat.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-inline void print( const Quat & quat, const char * name )
-{
-    Aos::Quat vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    quat.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-#endif
-
-} // namespace Soa
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_SOA_CPP_H
+#define _VECTORMATH_QUAT_SOA_CPP_H
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+namespace Vectormath {
+namespace Soa {
+
+inline Quat::Quat( const Quat & quat )
+{
+    mX = quat.mX;
+    mY = quat.mY;
+    mZ = quat.mZ;
+    mW = quat.mW;
+}
+
+inline Quat::Quat( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+    mW = _w;
+}
+
+inline Quat::Quat( const Vector3 & xyz, vec_float4 _w )
+{
+    this->setXYZ( xyz );
+    this->setW( _w );
+}
+
+inline Quat::Quat( const Vector4 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    mW = vec.getW();
+}
+
+inline Quat::Quat( vec_float4 scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+    mW = scalar;
+}
+
+inline Quat::Quat( Aos::Quat quat )
+{
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
+    vec_float4 vec128 = quat.get128();
+    mX = spu_shuffle( vec128, vec128, shuffle_xxxx );
+    mY = spu_shuffle( vec128, vec128, shuffle_yyyy );
+    mZ = spu_shuffle( vec128, vec128, shuffle_zzzz );
+    mW = spu_shuffle( vec128, vec128, shuffle_wwww );
+}
+
+inline Quat::Quat( Aos::Quat quat0, Aos::Quat quat1, Aos::Quat quat2, Aos::Quat quat3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = spu_shuffle( quat0.get128(), quat2.get128(), _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( quat1.get128(), quat3.get128(), _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( quat0.get128(), quat2.get128(), _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( quat1.get128(), quat3.get128(), _VECTORMATH_SHUF_ZCWD );
+    mX = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
+    mY = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
+    mZ = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
+    mW = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
+}
+
+inline const Quat Quat::identity( )
+{
+    return Quat( spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
+}
+
+inline const Quat lerp( vec_float4 t, const Quat & quat0, const Quat & quat1 )
+{
+    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
+}
+
+inline const Quat slerp( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1 )
+{
+    Quat start;
+    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
+    vec_uint4 selectMask;
+    cosAngle = dot( unitQuat0, unitQuat1 );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(0.0f), cosAngle );
+    cosAngle = spu_sel( cosAngle, negatef4( cosAngle ), selectMask );
+    start.setX( spu_sel( unitQuat0.getX(), negatef4( unitQuat0.getX() ), selectMask ) );
+    start.setY( spu_sel( unitQuat0.getY(), negatef4( unitQuat0.getY() ), selectMask ) );
+    start.setZ( spu_sel( unitQuat0.getZ(), negatef4( unitQuat0.getZ() ), selectMask ) );
+    start.setW( spu_sel( unitQuat0.getW(), negatef4( unitQuat0.getW() ), selectMask ) );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    recipSinAngle = recipf4( sinf4( angle ) );
+    scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
+    scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
+    return ( ( start * scale0 ) + ( unitQuat1 * scale1 ) );
+}
+
+inline const Quat squad( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 )
+{
+    Quat tmp0, tmp1;
+    tmp0 = slerp( t, unitQuat0, unitQuat3 );
+    tmp1 = slerp( t, unitQuat1, unitQuat2 );
+    return slerp( spu_mul( spu_mul( spu_splats(2.0f), t ), spu_sub( spu_splats(1.0f), t ) ), tmp0, tmp1 );
+}
+
+inline void Quat::get4Aos( Aos::Quat & result0, Aos::Quat & result1, Aos::Quat & result2, Aos::Quat & result3 ) const
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( mY, mW, _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( mY, mW, _VECTORMATH_SHUF_ZCWD );
+    result0 = Aos::Quat( spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB ) );
+    result1 = Aos::Quat( spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD ) );
+    result2 = Aos::Quat( spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB ) );
+    result3 = Aos::Quat( spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD ) );
+}
+
+inline Quat & Quat::operator =( const Quat & quat )
+{
+    mX = quat.mX;
+    mY = quat.mY;
+    mZ = quat.mZ;
+    mW = quat.mW;
+    return *this;
+}
+
+inline Quat & Quat::setXYZ( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    return *this;
+}
+
+inline const Vector3 Quat::getXYZ( ) const
+{
+    return Vector3( mX, mY, mZ );
+}
+
+inline Quat & Quat::setX( vec_float4 _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline vec_float4 Quat::getX( ) const
+{
+    return mX;
+}
+
+inline Quat & Quat::setY( vec_float4 _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline vec_float4 Quat::getY( ) const
+{
+    return mY;
+}
+
+inline Quat & Quat::setZ( vec_float4 _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline vec_float4 Quat::getZ( ) const
+{
+    return mZ;
+}
+
+inline Quat & Quat::setW( vec_float4 _w )
+{
+    mW = _w;
+    return *this;
+}
+
+inline vec_float4 Quat::getW( ) const
+{
+    return mW;
+}
+
+inline Quat & Quat::setElem( int idx, vec_float4 value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline vec_float4 Quat::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline Quat::vec_float4_t & Quat::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline vec_float4 Quat::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Quat Quat::operator +( const Quat & quat ) const
+{
+    return Quat(
+        spu_add( mX, quat.mX ),
+        spu_add( mY, quat.mY ),
+        spu_add( mZ, quat.mZ ),
+        spu_add( mW, quat.mW )
+    );
+}
+
+inline const Quat Quat::operator -( const Quat & quat ) const
+{
+    return Quat(
+        spu_sub( mX, quat.mX ),
+        spu_sub( mY, quat.mY ),
+        spu_sub( mZ, quat.mZ ),
+        spu_sub( mW, quat.mW )
+    );
+}
+
+inline const Quat Quat::operator *( vec_float4 scalar ) const
+{
+    return Quat(
+        spu_mul( mX, scalar ),
+        spu_mul( mY, scalar ),
+        spu_mul( mZ, scalar ),
+        spu_mul( mW, scalar )
+    );
+}
+
+inline Quat & Quat::operator +=( const Quat & quat )
+{
+    *this = *this + quat;
+    return *this;
+}
+
+inline Quat & Quat::operator -=( const Quat & quat )
+{
+    *this = *this - quat;
+    return *this;
+}
+
+inline Quat & Quat::operator *=( vec_float4 scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Quat Quat::operator /( vec_float4 scalar ) const
+{
+    return Quat(
+        divf4( mX, scalar ),
+        divf4( mY, scalar ),
+        divf4( mZ, scalar ),
+        divf4( mW, scalar )
+    );
+}
+
+inline Quat & Quat::operator /=( vec_float4 scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Quat Quat::operator -( ) const
+{
+    return Quat(
+        negatef4( mX ),
+        negatef4( mY ),
+        negatef4( mZ ),
+        negatef4( mW )
+    );
+}
+
+inline const Quat operator *( vec_float4 scalar, const Quat & quat )
+{
+    return quat * scalar;
+}
+
+inline vec_float4 dot( const Quat & quat0, const Quat & quat1 )
+{
+    vec_float4 result;
+    result = spu_mul( quat0.getX(), quat1.getX() );
+    result = spu_add( result, spu_mul( quat0.getY(), quat1.getY() ) );
+    result = spu_add( result, spu_mul( quat0.getZ(), quat1.getZ() ) );
+    result = spu_add( result, spu_mul( quat0.getW(), quat1.getW() ) );
+    return result;
+}
+
+inline vec_float4 norm( const Quat & quat )
+{
+    vec_float4 result;
+    result = spu_mul( quat.getX(), quat.getX() );
+    result = spu_add( result, spu_mul( quat.getY(), quat.getY() ) );
+    result = spu_add( result, spu_mul( quat.getZ(), quat.getZ() ) );
+    result = spu_add( result, spu_mul( quat.getW(), quat.getW() ) );
+    return result;
+}
+
+inline vec_float4 length( const Quat & quat )
+{
+    return sqrtf4( norm( quat ) );
+}
+
+inline const Quat normalize( const Quat & quat )
+{
+    vec_float4 lenSqr, lenInv;
+    lenSqr = norm( quat );
+    lenInv = rsqrtf4( lenSqr );
+    return Quat(
+        spu_mul( quat.getX(), lenInv ),
+        spu_mul( quat.getY(), lenInv ),
+        spu_mul( quat.getZ(), lenInv ),
+        spu_mul( quat.getW(), lenInv )
+    );
+}
+
+inline const Quat Quat::rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 )
+{
+    vec_float4 cosHalfAngleX2, recipCosHalfAngleX2;
+    cosHalfAngleX2 = sqrtf4( spu_mul( spu_splats(2.0f), spu_add( spu_splats(1.0f), dot( unitVec0, unitVec1 ) ) ) );
+    recipCosHalfAngleX2 = recipf4( cosHalfAngleX2 );
+    return Quat( ( cross( unitVec0, unitVec1 ) * recipCosHalfAngleX2 ), spu_mul( cosHalfAngleX2, spu_splats(0.5f) ) );
+}
+
+inline const Quat Quat::rotation( vec_float4 radians, const Vector3 & unitVec )
+{
+    vec_float4 s, c, angle;
+    angle = spu_mul( radians, spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    return Quat( ( unitVec * s ), c );
+}
+
+inline const Quat Quat::rotationX( vec_float4 radians )
+{
+    vec_float4 s, c, angle;
+    angle = spu_mul( radians, spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    return Quat( s, spu_splats(0.0f), spu_splats(0.0f), c );
+}
+
+inline const Quat Quat::rotationY( vec_float4 radians )
+{
+    vec_float4 s, c, angle;
+    angle = spu_mul( radians, spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    return Quat( spu_splats(0.0f), s, spu_splats(0.0f), c );
+}
+
+inline const Quat Quat::rotationZ( vec_float4 radians )
+{
+    vec_float4 s, c, angle;
+    angle = spu_mul( radians, spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    return Quat( spu_splats(0.0f), spu_splats(0.0f), s, c );
+}
+
+inline const Quat Quat::operator *( const Quat & quat ) const
+{
+    return Quat(
+        spu_sub( spu_add( spu_add( spu_mul( mW, quat.mX ), spu_mul( mX, quat.mW ) ), spu_mul( mY, quat.mZ ) ), spu_mul( mZ, quat.mY ) ),
+        spu_sub( spu_add( spu_add( spu_mul( mW, quat.mY ), spu_mul( mY, quat.mW ) ), spu_mul( mZ, quat.mX ) ), spu_mul( mX, quat.mZ ) ),
+        spu_sub( spu_add( spu_add( spu_mul( mW, quat.mZ ), spu_mul( mZ, quat.mW ) ), spu_mul( mX, quat.mY ) ), spu_mul( mY, quat.mX ) ),
+        spu_sub( spu_sub( spu_sub( spu_mul( mW, quat.mW ), spu_mul( mX, quat.mX ) ), spu_mul( mY, quat.mY ) ), spu_mul( mZ, quat.mZ ) )
+    );
+}
+
+inline Quat & Quat::operator *=( const Quat & quat )
+{
+    *this = *this * quat;
+    return *this;
+}
+
+inline const Vector3 rotate( const Quat & quat, const Vector3 & vec )
+{
+    vec_float4 tmpX, tmpY, tmpZ, tmpW;
+    tmpX = spu_sub( spu_add( spu_mul( quat.getW(), vec.getX() ), spu_mul( quat.getY(), vec.getZ() ) ), spu_mul( quat.getZ(), vec.getY() ) );
+    tmpY = spu_sub( spu_add( spu_mul( quat.getW(), vec.getY() ), spu_mul( quat.getZ(), vec.getX() ) ), spu_mul( quat.getX(), vec.getZ() ) );
+    tmpZ = spu_sub( spu_add( spu_mul( quat.getW(), vec.getZ() ), spu_mul( quat.getX(), vec.getY() ) ), spu_mul( quat.getY(), vec.getX() ) );
+    tmpW = spu_add( spu_add( spu_mul( quat.getX(), vec.getX() ), spu_mul( quat.getY(), vec.getY() ) ), spu_mul( quat.getZ(), vec.getZ() ) );
+    return Vector3(
+        spu_add( spu_sub( spu_add( spu_mul( tmpW, quat.getX() ), spu_mul( tmpX, quat.getW() ) ), spu_mul( tmpY, quat.getZ() ) ), spu_mul( tmpZ, quat.getY() ) ),
+        spu_add( spu_sub( spu_add( spu_mul( tmpW, quat.getY() ), spu_mul( tmpY, quat.getW() ) ), spu_mul( tmpZ, quat.getX() ) ), spu_mul( tmpX, quat.getZ() ) ),
+        spu_add( spu_sub( spu_add( spu_mul( tmpW, quat.getZ() ), spu_mul( tmpZ, quat.getW() ) ), spu_mul( tmpX, quat.getY() ) ), spu_mul( tmpY, quat.getX() ) )
+    );
+}
+
+inline const Quat conj( const Quat & quat )
+{
+    return Quat( negatef4( quat.getX() ), negatef4( quat.getY() ), negatef4( quat.getZ() ), quat.getW() );
+}
+
+inline const Quat select( const Quat & quat0, const Quat & quat1, vec_uint4 select1 )
+{
+    return Quat(
+        spu_sel( quat0.getX(), quat1.getX(), select1 ),
+        spu_sel( quat0.getY(), quat1.getY(), select1 ),
+        spu_sel( quat0.getZ(), quat1.getZ(), select1 ),
+        spu_sel( quat0.getW(), quat1.getW(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Quat & quat )
+{
+    Aos::Quat vec0, vec1, vec2, vec3;
+    quat.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+inline void print( const Quat & quat, const char * name )
+{
+    Aos::Quat vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    quat.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+#endif
+
+} // namespace Soa
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/cpp/vec_aos.h b/Extras/vectormathlibrary/include/vectormath/spu/cpp/vec_aos.h
index 8f677abeb..c983f1817 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/cpp/vec_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/cpp/vec_aos.h
@@ -1,1167 +1,1167 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_AOS_CPP_H
-#define _VECTORMATH_VEC_AOS_CPP_H
-//-----------------------------------------------------------------------------
-// Constants
-// for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
-
-#define _VECTORMATH_SHUF_X 0x00010203
-#define _VECTORMATH_SHUF_Y 0x04050607
-#define _VECTORMATH_SHUF_Z 0x08090a0b
-#define _VECTORMATH_SHUF_W 0x0c0d0e0f
-#define _VECTORMATH_SHUF_A 0x10111213
-#define _VECTORMATH_SHUF_B 0x14151617
-#define _VECTORMATH_SHUF_C 0x18191a1b
-#define _VECTORMATH_SHUF_D 0x1c1d1e1f
-#define _VECTORMATH_SHUF_0 0x80808080
-#define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
-#define _VECTORMATH_SHUF_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_W }
-#define _VECTORMATH_SHUF_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W }
-#define _VECTORMATH_SHUF_WABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
-#define _VECTORMATH_SHUF_ZWAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
-#define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
-#define _VECTORMATH_SHUF_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
-#define _VECTORMATH_SHUF_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
-#define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
-#define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-static inline vec_float4 _vmathVfDot3( vec_float4 vec0, vec_float4 vec1 )
-{
-    vec_float4 result;
-    result = spu_mul( vec0, vec1 );
-    result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
-    return spu_madd( spu_rlqwbyte( vec0, 8 ), spu_rlqwbyte( vec1, 8 ), result );
-}
-
-static inline vec_float4 _vmathVfDot4( vec_float4 vec0, vec_float4 vec1 )
-{
-    vec_float4 result;
-    result = spu_mul( vec0, vec1 );
-    result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
-    return spu_add( spu_rlqwbyte( result, 8 ), result );
-}
-
-static inline vec_float4 _vmathVfCross( vec_float4 vec0, vec_float4 vec1 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3, result;
-    tmp0 = spu_shuffle( vec0, vec0, _VECTORMATH_SHUF_YZXW );
-    tmp1 = spu_shuffle( vec1, vec1, _VECTORMATH_SHUF_ZXYW );
-    tmp2 = spu_shuffle( vec0, vec0, _VECTORMATH_SHUF_ZXYW );
-    tmp3 = spu_shuffle( vec1, vec1, _VECTORMATH_SHUF_YZXW );
-    result = spu_mul( tmp0, tmp1 );
-    result = spu_nmsub( tmp2, tmp3, result );
-    return result;
-}
-
-static inline vec_uint4 _vmathVfToHalfFloatsUnpacked(vec_float4 v)
-{
-    vec_int4 bexp;
-    vec_uint4 mant, sign, hfloat;
-    vec_uint4 notZero, isInf;
-    const vec_uint4 hfloatInf = spu_splats(0x00007c00u);
-    const vec_uint4 mergeMant = spu_splats(0x000003ffu);
-    const vec_uint4 mergeSign = spu_splats(0x00008000u);
-
-    sign = spu_rlmask((vec_uint4)v, -16);
-    mant = spu_rlmask((vec_uint4)v, -13);
-    bexp = spu_and(spu_rlmask((vec_int4)v, -23), 0xff);
-
-    notZero = spu_cmpgt(bexp, 112);
-    isInf = spu_cmpgt(bexp, 142);
-
-    bexp = spu_add(bexp, -112);
-    bexp = spu_sl(bexp, 10);
-
-    hfloat = spu_sel((vec_uint4)bexp, mant, mergeMant);
-    hfloat = spu_sel(spu_splats(0u), hfloat, notZero);
-    hfloat = spu_sel(hfloat, hfloatInf, isInf);
-    hfloat = spu_sel(hfloat, sign, mergeSign);
-
-    return hfloat;
-}
-
-static inline vec_ushort8 _vmath2VfToHalfFloats(vec_float4 u, vec_float4 v)
-{
-    vec_uint4 hfloat_u, hfloat_v;
-    const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
-    hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
-    hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
-    return (vec_ushort8)spu_shuffle(hfloat_u, hfloat_v, pack);
-}
-
-#endif
-
-namespace Vectormath {
-namespace Aos {
-
-inline VecIdx::operator float() const
-{
-    return spu_extract( ref, i );
-}
-
-inline float VecIdx::operator =( float scalar )
-{
-    ref = spu_insert( scalar, ref, i );
-    return scalar;
-}
-
-inline float VecIdx::operator =( const VecIdx& scalar )
-{
-    return *this = float(scalar);
-}
-
-inline float VecIdx::operator *=( float scalar )
-{
-    float tmp = spu_extract( ref, i ) * scalar;
-    ref = spu_insert( tmp, ref, i );
-    return tmp;
-}
-
-inline float VecIdx::operator /=( float scalar )
-{
-    float tmp = spu_extract( ref, i ) / scalar;
-    ref = spu_insert( tmp, ref, i );
-    return tmp;
-}
-
-inline float VecIdx::operator +=( float scalar )
-{
-    float tmp = spu_extract( ref, i ) + scalar;
-    ref = spu_insert( tmp, ref, i );
-    return tmp;
-}
-
-inline float VecIdx::operator -=( float scalar )
-{
-    float tmp = spu_extract( ref, i ) - scalar;
-    ref = spu_insert( tmp, ref, i );
-    return tmp;
-}
-
-inline Vector3::Vector3( float _x, float _y, float _z )
-{
-    mVec128 = (vec_float4){ _x, _y, _z, 0.0f  };
-}
-
-inline Vector3::Vector3( Point3 pnt )
-{
-    mVec128 = pnt.get128();
-}
-
-inline Vector3::Vector3( float scalar )
-{
-    mVec128 = spu_splats( scalar );
-}
-
-inline Vector3::Vector3( vec_float4 vf4 )
-{
-    mVec128 = vf4;
-}
-
-inline const Vector3 Vector3::xAxis( )
-{
-    return Vector3( _VECTORMATH_UNIT_1000 );
-}
-
-inline const Vector3 Vector3::yAxis( )
-{
-    return Vector3( _VECTORMATH_UNIT_0100 );
-}
-
-inline const Vector3 Vector3::zAxis( )
-{
-    return Vector3( _VECTORMATH_UNIT_0010 );
-}
-
-inline const Vector3 lerp( float t, Vector3 vec0, Vector3 vec1 )
-{
-    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
-}
-
-inline const Vector3 slerp( float t, Vector3 unitVec0, Vector3 unitVec1 )
-{
-    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    vec_uint4 selectMask;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
-    cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = spu_splats(t);
-    oneMinusT = spu_sub( spu_splats(1.0f), tttt );
-    angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
-    angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
-    angles = spu_mul( angles, angle );
-    sines = sinf4( angles );
-    scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
-    scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
-    scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
-    return Vector3( spu_madd( unitVec0.get128(), scale0, spu_mul( unitVec1.get128(), scale1 ) ) );
-}
-
-inline vec_float4 Vector3::get128( ) const
-{
-    return mVec128;
-}
-
-inline void storeXYZ( Vector3 vec, vec_float4 * quad )
-{
-    vec_float4 dstVec = *quad;
-    vec_uint4 mask = (vec_uint4)spu_maskb(0x000f);
-    dstVec = spu_sel(vec.get128(), dstVec, mask);
-    *quad = dstVec;
-}
-
-inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const vec_float4 * threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyz1 = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_WABC );
-    xyz2 = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_ZWAB );
-    xyz3 = spu_rlqwbyte( zxyz, 4 );
-    vec0 = Vector3( xyzx );
-    vec1 = Vector3( xyz1 );
-    vec2 = Vector3( xyz2 );
-    vec3 = Vector3( xyz3 );
-}
-
-inline void storeXYZArray( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, vec_float4 * threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz;
-    xyzx = spu_shuffle( vec0.get128(), vec1.get128(), _VECTORMATH_SHUF_XYZA );
-    yzxy = spu_shuffle( vec1.get128(), vec2.get128(), _VECTORMATH_SHUF_YZAB );
-    zxyz = spu_shuffle( vec2.get128(), vec3.get128(), _VECTORMATH_SHUF_ZABC );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-inline void storeHalfFloats( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, Vector3 vec4, Vector3 vec5, Vector3 vec6, Vector3 vec7, vec_ushort8 * threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    storeXYZArray( vec0, vec1, vec2, vec3, xyz0 );
-    storeXYZArray( vec4, vec5, vec6, vec7, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-inline Vector3 & Vector3::operator =( Vector3 vec )
-{
-    mVec128 = vec.mVec128;
-    return *this;
-}
-
-inline Vector3 & Vector3::setX( float _x )
-{
-    mVec128 = spu_insert( _x, mVec128, 0 );
-    return *this;
-}
-
-inline float Vector3::getX( ) const
-{
-    return spu_extract( mVec128, 0 );
-}
-
-inline Vector3 & Vector3::setY( float _y )
-{
-    mVec128 = spu_insert( _y, mVec128, 1 );
-    return *this;
-}
-
-inline float Vector3::getY( ) const
-{
-    return spu_extract( mVec128, 1 );
-}
-
-inline Vector3 & Vector3::setZ( float _z )
-{
-    mVec128 = spu_insert( _z, mVec128, 2 );
-    return *this;
-}
-
-inline float Vector3::getZ( ) const
-{
-    return spu_extract( mVec128, 2 );
-}
-
-inline Vector3 & Vector3::setElem( int idx, float value )
-{
-    mVec128 = spu_insert( value, mVec128, idx );
-    return *this;
-}
-
-inline float Vector3::getElem( int idx ) const
-{
-    return spu_extract( mVec128, idx );
-}
-
-inline VecIdx Vector3::operator []( int idx )
-{
-    return VecIdx( mVec128, idx );
-}
-
-inline float Vector3::operator []( int idx ) const
-{
-    return spu_extract( mVec128, idx );
-}
-
-inline const Vector3 Vector3::operator +( Vector3 vec ) const
-{
-    return Vector3( spu_add( mVec128, vec.mVec128 ) );
-}
-
-inline const Vector3 Vector3::operator -( Vector3 vec ) const
-{
-    return Vector3( spu_sub( mVec128, vec.mVec128 ) );
-}
-
-inline const Point3 Vector3::operator +( Point3 pnt ) const
-{
-    return Point3( spu_add( mVec128, pnt.get128() ) );
-}
-
-inline const Vector3 Vector3::operator *( float scalar ) const
-{
-    return Vector3( spu_mul( mVec128, spu_splats(scalar) ) );
-}
-
-inline Vector3 & Vector3::operator +=( Vector3 vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator -=( Vector3 vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Vector3 Vector3::operator /( float scalar ) const
-{
-    return Vector3( divf4( mVec128, spu_splats(scalar) ) );
-}
-
-inline Vector3 & Vector3::operator /=( float scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Vector3 Vector3::operator -( ) const
-{
-    return Vector3( negatef4( mVec128 ) );
-}
-
-inline const Vector3 operator *( float scalar, Vector3 vec )
-{
-    return vec * scalar;
-}
-
-inline const Vector3 mulPerElem( Vector3 vec0, Vector3 vec1 )
-{
-    return Vector3( spu_mul( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector3 divPerElem( Vector3 vec0, Vector3 vec1 )
-{
-    return Vector3( divf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector3 recipPerElem( Vector3 vec )
-{
-    return Vector3( recipf4( vec.get128() ) );
-}
-
-inline const Vector3 sqrtPerElem( Vector3 vec )
-{
-    return Vector3( sqrtf4( vec.get128() ) );
-}
-
-inline const Vector3 rsqrtPerElem( Vector3 vec )
-{
-    return Vector3( rsqrtf4( vec.get128() ) );
-}
-
-inline const Vector3 absPerElem( Vector3 vec )
-{
-    return Vector3( fabsf4( vec.get128() ) );
-}
-
-inline const Vector3 copySignPerElem( Vector3 vec0, Vector3 vec1 )
-{
-    return Vector3( copysignf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector3 maxPerElem( Vector3 vec0, Vector3 vec1 )
-{
-    return Vector3( fmaxf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline float maxElem( Vector3 vec )
-{
-    vec_float4 result;
-    result = fmaxf4( spu_promote( spu_extract( vec.get128(), 1 ), 0 ), vec.get128() );
-    result = fmaxf4( spu_promote( spu_extract( vec.get128(), 2 ), 0 ), result );
-    return spu_extract( result, 0 );
-}
-
-inline const Vector3 minPerElem( Vector3 vec0, Vector3 vec1 )
-{
-    return Vector3( fminf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline float minElem( Vector3 vec )
-{
-    vec_float4 result;
-    result = fminf4( spu_promote( spu_extract( vec.get128(), 1 ), 0 ), vec.get128() );
-    result = fminf4( spu_promote( spu_extract( vec.get128(), 2 ), 0 ), result );
-    return spu_extract( result, 0 );
-}
-
-inline float sum( Vector3 vec )
-{
-    return
-        spu_extract( vec.get128(), 0 ) +
-        spu_extract( vec.get128(), 1 ) +
-        spu_extract( vec.get128(), 2 );
-}
-
-inline float dot( Vector3 vec0, Vector3 vec1 )
-{
-    return spu_extract( _vmathVfDot3( vec0.get128(), vec1.get128() ), 0 );
-}
-
-inline float lengthSqr( Vector3 vec )
-{
-    return spu_extract( _vmathVfDot3( vec.get128(), vec.get128() ), 0 );
-}
-
-inline float length( Vector3 vec )
-{
-    return sqrtf( lengthSqr( vec ) );
-}
-
-inline const Vector3 normalize( Vector3 vec )
-{
-    vec_float4 dot = _vmathVfDot3( vec.get128(), vec.get128() );
-    dot = spu_shuffle( dot, dot, (vec_uchar16)spu_splats(0x00010203) );
-    return Vector3( spu_mul( vec.get128(), rsqrtf4( dot ) ) );
-}
-
-inline const Vector3 cross( Vector3 vec0, Vector3 vec1 )
-{
-    return Vector3( _vmathVfCross( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector3 select( Vector3 vec0, Vector3 vec1, bool select1 )
-{
-    return Vector3( spu_sel( vec0.get128(), vec1.get128(), spu_splats( (unsigned int)-(select1 > 0) ) ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( Vector3 vec )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec.get128();
-    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-inline void print( Vector3 vec, const char * name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec.get128();
-    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-#endif
-
-inline Vector4::Vector4( float _x, float _y, float _z, float _w )
-{
-    mVec128 = (vec_float4){ _x, _y, _z, _w };
-}
-
-inline Vector4::Vector4( Vector3 xyz, float _w )
-{
-    mVec128 = spu_shuffle( xyz.get128(), spu_promote( _w, 0 ), _VECTORMATH_SHUF_XYZA );
-}
-
-inline Vector4::Vector4( Vector3 vec )
-{
-    mVec128 = spu_sel( vec.get128(), spu_splats(0.0f), (vec_uint4)spu_maskb(0x000f) );
-}
-
-inline Vector4::Vector4( Point3 pnt )
-{
-    mVec128 = spu_sel( pnt.get128(), spu_splats(1.0f), (vec_uint4)spu_maskb(0x000f) );
-}
-
-inline Vector4::Vector4( Quat quat )
-{
-    mVec128 = quat.get128();
-}
-
-inline Vector4::Vector4( float scalar )
-{
-    mVec128 = spu_splats( scalar );
-}
-
-inline Vector4::Vector4( vec_float4 vf4 )
-{
-    mVec128 = vf4;
-}
-
-inline const Vector4 Vector4::xAxis( )
-{
-    return Vector4( _VECTORMATH_UNIT_1000 );
-}
-
-inline const Vector4 Vector4::yAxis( )
-{
-    return Vector4( _VECTORMATH_UNIT_0100 );
-}
-
-inline const Vector4 Vector4::zAxis( )
-{
-    return Vector4( _VECTORMATH_UNIT_0010 );
-}
-
-inline const Vector4 Vector4::wAxis( )
-{
-    return Vector4( _VECTORMATH_UNIT_0001 );
-}
-
-inline const Vector4 lerp( float t, Vector4 vec0, Vector4 vec1 )
-{
-    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
-}
-
-inline const Vector4 slerp( float t, Vector4 unitVec0, Vector4 unitVec1 )
-{
-    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    vec_uint4 selectMask;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    cosAngle = _vmathVfDot4( unitVec0.get128(), unitVec1.get128() );
-    cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = spu_splats(t);
-    oneMinusT = spu_sub( spu_splats(1.0f), tttt );
-    angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
-    angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
-    angles = spu_mul( angles, angle );
-    sines = sinf4( angles );
-    scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
-    scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
-    scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
-    return Vector4( spu_madd( unitVec0.get128(), scale0, spu_mul( unitVec1.get128(), scale1 ) ) );
-}
-
-inline vec_float4 Vector4::get128( ) const
-{
-    return mVec128;
-}
-
-inline void storeHalfFloats( Vector4 vec0, Vector4 vec1, Vector4 vec2, Vector4 vec3, vec_ushort8 * twoQuads )
-{
-    twoQuads[0] = _vmath2VfToHalfFloats(vec0.get128(), vec1.get128());
-    twoQuads[1] = _vmath2VfToHalfFloats(vec2.get128(), vec3.get128());
-}
-
-inline Vector4 & Vector4::operator =( Vector4 vec )
-{
-    mVec128 = vec.mVec128;
-    return *this;
-}
-
-inline Vector4 & Vector4::setXYZ( Vector3 vec )
-{
-    mVec128 = spu_sel( vec.get128(), mVec128, (vec_uint4)spu_maskb(0x000f) );
-    return *this;
-}
-
-inline const Vector3 Vector4::getXYZ( ) const
-{
-    return Vector3( mVec128 );
-}
-
-inline Vector4 & Vector4::setX( float _x )
-{
-    mVec128 = spu_insert( _x, mVec128, 0 );
-    return *this;
-}
-
-inline float Vector4::getX( ) const
-{
-    return spu_extract( mVec128, 0 );
-}
-
-inline Vector4 & Vector4::setY( float _y )
-{
-    mVec128 = spu_insert( _y, mVec128, 1 );
-    return *this;
-}
-
-inline float Vector4::getY( ) const
-{
-    return spu_extract( mVec128, 1 );
-}
-
-inline Vector4 & Vector4::setZ( float _z )
-{
-    mVec128 = spu_insert( _z, mVec128, 2 );
-    return *this;
-}
-
-inline float Vector4::getZ( ) const
-{
-    return spu_extract( mVec128, 2 );
-}
-
-inline Vector4 & Vector4::setW( float _w )
-{
-    mVec128 = spu_insert( _w, mVec128, 3 );
-    return *this;
-}
-
-inline float Vector4::getW( ) const
-{
-    return spu_extract( mVec128, 3 );
-}
-
-inline Vector4 & Vector4::setElem( int idx, float value )
-{
-    mVec128 = spu_insert( value, mVec128, idx );
-    return *this;
-}
-
-inline float Vector4::getElem( int idx ) const
-{
-    return spu_extract( mVec128, idx );
-}
-
-inline VecIdx Vector4::operator []( int idx )
-{
-    return VecIdx( mVec128, idx );
-}
-
-inline float Vector4::operator []( int idx ) const
-{
-    return spu_extract( mVec128, idx );
-}
-
-inline const Vector4 Vector4::operator +( Vector4 vec ) const
-{
-    return Vector4( spu_add( mVec128, vec.mVec128 ) );
-}
-
-inline const Vector4 Vector4::operator -( Vector4 vec ) const
-{
-    return Vector4( spu_sub( mVec128, vec.mVec128 ) );
-}
-
-inline const Vector4 Vector4::operator *( float scalar ) const
-{
-    return Vector4( spu_mul( mVec128, spu_splats(scalar) ) );
-}
-
-inline Vector4 & Vector4::operator +=( Vector4 vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator -=( Vector4 vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Vector4 Vector4::operator /( float scalar ) const
-{
-    return Vector4( divf4( mVec128, spu_splats(scalar) ) );
-}
-
-inline Vector4 & Vector4::operator /=( float scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Vector4 Vector4::operator -( ) const
-{
-    return Vector4( negatef4( mVec128 ) );
-}
-
-inline const Vector4 operator *( float scalar, Vector4 vec )
-{
-    return vec * scalar;
-}
-
-inline const Vector4 mulPerElem( Vector4 vec0, Vector4 vec1 )
-{
-    return Vector4( spu_mul( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector4 divPerElem( Vector4 vec0, Vector4 vec1 )
-{
-    return Vector4( divf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector4 recipPerElem( Vector4 vec )
-{
-    return Vector4( recipf4( vec.get128() ) );
-}
-
-inline const Vector4 sqrtPerElem( Vector4 vec )
-{
-    return Vector4( sqrtf4( vec.get128() ) );
-}
-
-inline const Vector4 rsqrtPerElem( Vector4 vec )
-{
-    return Vector4( rsqrtf4( vec.get128() ) );
-}
-
-inline const Vector4 absPerElem( Vector4 vec )
-{
-    return Vector4( fabsf4( vec.get128() ) );
-}
-
-inline const Vector4 copySignPerElem( Vector4 vec0, Vector4 vec1 )
-{
-    return Vector4( copysignf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector4 maxPerElem( Vector4 vec0, Vector4 vec1 )
-{
-    return Vector4( fmaxf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline float maxElem( Vector4 vec )
-{
-    vec_float4 result;
-    result = fmaxf4( spu_promote( spu_extract( vec.get128(), 1 ), 0 ), vec.get128() );
-    result = fmaxf4( spu_promote( spu_extract( vec.get128(), 2 ), 0 ), result );
-    result = fmaxf4( spu_promote( spu_extract( vec.get128(), 3 ), 0 ), result );
-    return spu_extract( result, 0 );
-}
-
-inline const Vector4 minPerElem( Vector4 vec0, Vector4 vec1 )
-{
-    return Vector4( fminf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline float minElem( Vector4 vec )
-{
-    vec_float4 result;
-    result = fminf4( spu_promote( spu_extract( vec.get128(), 1 ), 0 ), vec.get128() );
-    result = fminf4( spu_promote( spu_extract( vec.get128(), 2 ), 0 ), result );
-    result = fminf4( spu_promote( spu_extract( vec.get128(), 3 ), 0 ), result );
-    return spu_extract( result, 0 );
-}
-
-inline float sum( Vector4 vec )
-{
-    return
-        spu_extract( vec.get128(), 0 ) +
-        spu_extract( vec.get128(), 1 ) +
-        spu_extract( vec.get128(), 2 ) +
-        spu_extract( vec.get128(), 3 );
-}
-
-inline float dot( Vector4 vec0, Vector4 vec1 )
-{
-    return spu_extract( _vmathVfDot4( vec0.get128(), vec1.get128() ), 0 );
-}
-
-inline float lengthSqr( Vector4 vec )
-{
-    return spu_extract( _vmathVfDot4( vec.get128(), vec.get128() ), 0 );
-}
-
-inline float length( Vector4 vec )
-{
-    return sqrtf( lengthSqr( vec ) );
-}
-
-inline const Vector4 normalize( Vector4 vec )
-{
-    vec_float4 dot = _vmathVfDot4( vec.get128(), vec.get128() );
-    return Vector4( spu_mul( vec.get128(), rsqrtf4( dot ) ) );
-}
-
-inline const Vector4 select( Vector4 vec0, Vector4 vec1, bool select1 )
-{
-    return Vector4( spu_sel( vec0.get128(), vec1.get128(), spu_splats( (unsigned int)-(select1 > 0) ) ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( Vector4 vec )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec.get128();
-    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-inline void print( Vector4 vec, const char * name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec.get128();
-    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-#endif
-
-inline Point3::Point3( float _x, float _y, float _z )
-{
-    mVec128 = (vec_float4){ _x, _y, _z, 0.0f  };
-}
-
-inline Point3::Point3( Vector3 vec )
-{
-    mVec128 = vec.get128();
-}
-
-inline Point3::Point3( float scalar )
-{
-    mVec128 = spu_splats( scalar );
-}
-
-inline Point3::Point3( vec_float4 vf4 )
-{
-    mVec128 = vf4;
-}
-
-inline const Point3 lerp( float t, Point3 pnt0, Point3 pnt1 )
-{
-    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
-}
-
-inline vec_float4 Point3::get128( ) const
-{
-    return mVec128;
-}
-
-inline void storeXYZ( Point3 pnt, vec_float4 * quad )
-{
-    vec_float4 dstVec = *quad;
-    vec_uint4 mask = (vec_uint4)spu_maskb(0x000f);
-    dstVec = spu_sel(pnt.get128(), dstVec, mask);
-    *quad = dstVec;
-}
-
-inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const vec_float4 * threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyz1 = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_WABC );
-    xyz2 = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_ZWAB );
-    xyz3 = spu_rlqwbyte( zxyz, 4 );
-    pnt0 = Point3( xyzx );
-    pnt1 = Point3( xyz1 );
-    pnt2 = Point3( xyz2 );
-    pnt3 = Point3( xyz3 );
-}
-
-inline void storeXYZArray( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, vec_float4 * threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz;
-    xyzx = spu_shuffle( pnt0.get128(), pnt1.get128(), _VECTORMATH_SHUF_XYZA );
-    yzxy = spu_shuffle( pnt1.get128(), pnt2.get128(), _VECTORMATH_SHUF_YZAB );
-    zxyz = spu_shuffle( pnt2.get128(), pnt3.get128(), _VECTORMATH_SHUF_ZABC );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-inline void storeHalfFloats( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, Point3 pnt4, Point3 pnt5, Point3 pnt6, Point3 pnt7, vec_ushort8 * threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    storeXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
-    storeXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-inline Point3 & Point3::operator =( Point3 pnt )
-{
-    mVec128 = pnt.mVec128;
-    return *this;
-}
-
-inline Point3 & Point3::setX( float _x )
-{
-    mVec128 = spu_insert( _x, mVec128, 0 );
-    return *this;
-}
-
-inline float Point3::getX( ) const
-{
-    return spu_extract( mVec128, 0 );
-}
-
-inline Point3 & Point3::setY( float _y )
-{
-    mVec128 = spu_insert( _y, mVec128, 1 );
-    return *this;
-}
-
-inline float Point3::getY( ) const
-{
-    return spu_extract( mVec128, 1 );
-}
-
-inline Point3 & Point3::setZ( float _z )
-{
-    mVec128 = spu_insert( _z, mVec128, 2 );
-    return *this;
-}
-
-inline float Point3::getZ( ) const
-{
-    return spu_extract( mVec128, 2 );
-}
-
-inline Point3 & Point3::setElem( int idx, float value )
-{
-    mVec128 = spu_insert( value, mVec128, idx );
-    return *this;
-}
-
-inline float Point3::getElem( int idx ) const
-{
-    return spu_extract( mVec128, idx );
-}
-
-inline VecIdx Point3::operator []( int idx )
-{
-    return VecIdx( mVec128, idx );
-}
-
-inline float Point3::operator []( int idx ) const
-{
-    return spu_extract( mVec128, idx );
-}
-
-inline const Vector3 Point3::operator -( Point3 pnt ) const
-{
-    return Vector3( spu_sub( mVec128, pnt.mVec128 ) );
-}
-
-inline const Point3 Point3::operator +( Vector3 vec ) const
-{
-    return Point3( spu_add( mVec128, vec.get128() ) );
-}
-
-inline const Point3 Point3::operator -( Vector3 vec ) const
-{
-    return Point3( spu_sub( mVec128, vec.get128() ) );
-}
-
-inline Point3 & Point3::operator +=( Vector3 vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Point3 & Point3::operator -=( Vector3 vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline const Point3 mulPerElem( Point3 pnt0, Point3 pnt1 )
-{
-    return Point3( spu_mul( pnt0.get128(), pnt1.get128() ) );
-}
-
-inline const Point3 divPerElem( Point3 pnt0, Point3 pnt1 )
-{
-    return Point3( divf4( pnt0.get128(), pnt1.get128() ) );
-}
-
-inline const Point3 recipPerElem( Point3 pnt )
-{
-    return Point3( recipf4( pnt.get128() ) );
-}
-
-inline const Point3 sqrtPerElem( Point3 pnt )
-{
-    return Point3( sqrtf4( pnt.get128() ) );
-}
-
-inline const Point3 rsqrtPerElem( Point3 pnt )
-{
-    return Point3( rsqrtf4( pnt.get128() ) );
-}
-
-inline const Point3 absPerElem( Point3 pnt )
-{
-    return Point3( fabsf4( pnt.get128() ) );
-}
-
-inline const Point3 copySignPerElem( Point3 pnt0, Point3 pnt1 )
-{
-    return Point3( copysignf4( pnt0.get128(), pnt1.get128() ) );
-}
-
-inline const Point3 maxPerElem( Point3 pnt0, Point3 pnt1 )
-{
-    return Point3( fmaxf4( pnt0.get128(), pnt1.get128() ) );
-}
-
-inline float maxElem( Point3 pnt )
-{
-    vec_float4 result;
-    result = fmaxf4( spu_promote( spu_extract( pnt.get128(), 1 ), 0 ), pnt.get128() );
-    result = fmaxf4( spu_promote( spu_extract( pnt.get128(), 2 ), 0 ), result );
-    return spu_extract( result, 0 );
-}
-
-inline const Point3 minPerElem( Point3 pnt0, Point3 pnt1 )
-{
-    return Point3( fminf4( pnt0.get128(), pnt1.get128() ) );
-}
-
-inline float minElem( Point3 pnt )
-{
-    vec_float4 result;
-    result = fminf4( spu_promote( spu_extract( pnt.get128(), 1 ), 0 ), pnt.get128() );
-    result = fminf4( spu_promote( spu_extract( pnt.get128(), 2 ), 0 ), result );
-    return spu_extract( result, 0 );
-}
-
-inline float sum( Point3 pnt )
-{
-    return
-        spu_extract( pnt.get128(), 0 ) +
-        spu_extract( pnt.get128(), 1 ) +
-        spu_extract( pnt.get128(), 2 );
-}
-
-inline const Point3 scale( Point3 pnt, float scaleVal )
-{
-    return mulPerElem( pnt, Point3( scaleVal ) );
-}
-
-inline const Point3 scale( Point3 pnt, Vector3 scaleVec )
-{
-    return mulPerElem( pnt, Point3( scaleVec ) );
-}
-
-inline float projection( Point3 pnt, Vector3 unitVec )
-{
-    return spu_extract( _vmathVfDot3( pnt.get128(), unitVec.get128() ), 0 );
-}
-
-inline float distSqrFromOrigin( Point3 pnt )
-{
-    return lengthSqr( Vector3( pnt ) );
-}
-
-inline float distFromOrigin( Point3 pnt )
-{
-    return length( Vector3( pnt ) );
-}
-
-inline float distSqr( Point3 pnt0, Point3 pnt1 )
-{
-    return lengthSqr( ( pnt1 - pnt0 ) );
-}
-
-inline float dist( Point3 pnt0, Point3 pnt1 )
-{
-    return length( ( pnt1 - pnt0 ) );
-}
-
-inline const Point3 select( Point3 pnt0, Point3 pnt1, bool select1 )
-{
-    return Point3( spu_sel( pnt0.get128(), pnt1.get128(), spu_splats( (unsigned int)-(select1 > 0) ) ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( Point3 pnt )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = pnt.get128();
-    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-inline void print( Point3 pnt, const char * name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = pnt.get128();
-    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-#endif
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_AOS_CPP_H
+#define _VECTORMATH_VEC_AOS_CPP_H
+//-----------------------------------------------------------------------------
+// Constants
+// for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+
+#define _VECTORMATH_SHUF_X 0x00010203
+#define _VECTORMATH_SHUF_Y 0x04050607
+#define _VECTORMATH_SHUF_Z 0x08090a0b
+#define _VECTORMATH_SHUF_W 0x0c0d0e0f
+#define _VECTORMATH_SHUF_A 0x10111213
+#define _VECTORMATH_SHUF_B 0x14151617
+#define _VECTORMATH_SHUF_C 0x18191a1b
+#define _VECTORMATH_SHUF_D 0x1c1d1e1f
+#define _VECTORMATH_SHUF_0 0x80808080
+#define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
+#define _VECTORMATH_SHUF_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_W }
+#define _VECTORMATH_SHUF_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W }
+#define _VECTORMATH_SHUF_WABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
+#define _VECTORMATH_SHUF_ZWAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
+#define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
+#define _VECTORMATH_SHUF_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
+#define _VECTORMATH_SHUF_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
+#define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
+#define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+static inline vec_float4 _vmathVfDot3( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 result;
+    result = spu_mul( vec0, vec1 );
+    result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
+    return spu_madd( spu_rlqwbyte( vec0, 8 ), spu_rlqwbyte( vec1, 8 ), result );
+}
+
+static inline vec_float4 _vmathVfDot4( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 result;
+    result = spu_mul( vec0, vec1 );
+    result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
+    return spu_add( spu_rlqwbyte( result, 8 ), result );
+}
+
+static inline vec_float4 _vmathVfCross( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3, result;
+    tmp0 = spu_shuffle( vec0, vec0, _VECTORMATH_SHUF_YZXW );
+    tmp1 = spu_shuffle( vec1, vec1, _VECTORMATH_SHUF_ZXYW );
+    tmp2 = spu_shuffle( vec0, vec0, _VECTORMATH_SHUF_ZXYW );
+    tmp3 = spu_shuffle( vec1, vec1, _VECTORMATH_SHUF_YZXW );
+    result = spu_mul( tmp0, tmp1 );
+    result = spu_nmsub( tmp2, tmp3, result );
+    return result;
+}
+
+static inline vec_uint4 _vmathVfToHalfFloatsUnpacked(vec_float4 v)
+{
+    vec_int4 bexp;
+    vec_uint4 mant, sign, hfloat;
+    vec_uint4 notZero, isInf;
+    const vec_uint4 hfloatInf = spu_splats(0x00007c00u);
+    const vec_uint4 mergeMant = spu_splats(0x000003ffu);
+    const vec_uint4 mergeSign = spu_splats(0x00008000u);
+
+    sign = spu_rlmask((vec_uint4)v, -16);
+    mant = spu_rlmask((vec_uint4)v, -13);
+    bexp = spu_and(spu_rlmask((vec_int4)v, -23), 0xff);
+
+    notZero = spu_cmpgt(bexp, 112);
+    isInf = spu_cmpgt(bexp, 142);
+
+    bexp = spu_add(bexp, -112);
+    bexp = spu_sl(bexp, 10);
+
+    hfloat = spu_sel((vec_uint4)bexp, mant, mergeMant);
+    hfloat = spu_sel(spu_splats(0u), hfloat, notZero);
+    hfloat = spu_sel(hfloat, hfloatInf, isInf);
+    hfloat = spu_sel(hfloat, sign, mergeSign);
+
+    return hfloat;
+}
+
+static inline vec_ushort8 _vmath2VfToHalfFloats(vec_float4 u, vec_float4 v)
+{
+    vec_uint4 hfloat_u, hfloat_v;
+    const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
+    hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
+    hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
+    return (vec_ushort8)spu_shuffle(hfloat_u, hfloat_v, pack);
+}
+
+#endif
+
+namespace Vectormath {
+namespace Aos {
+
+inline VecIdx::operator float() const
+{
+    return spu_extract( ref, i );
+}
+
+inline float VecIdx::operator =( float scalar )
+{
+    ref = spu_insert( scalar, ref, i );
+    return scalar;
+}
+
+inline float VecIdx::operator =( const VecIdx& scalar )
+{
+    return *this = float(scalar);
+}
+
+inline float VecIdx::operator *=( float scalar )
+{
+    float tmp = spu_extract( ref, i ) * scalar;
+    ref = spu_insert( tmp, ref, i );
+    return tmp;
+}
+
+inline float VecIdx::operator /=( float scalar )
+{
+    float tmp = spu_extract( ref, i ) / scalar;
+    ref = spu_insert( tmp, ref, i );
+    return tmp;
+}
+
+inline float VecIdx::operator +=( float scalar )
+{
+    float tmp = spu_extract( ref, i ) + scalar;
+    ref = spu_insert( tmp, ref, i );
+    return tmp;
+}
+
+inline float VecIdx::operator -=( float scalar )
+{
+    float tmp = spu_extract( ref, i ) - scalar;
+    ref = spu_insert( tmp, ref, i );
+    return tmp;
+}
+
+inline Vector3::Vector3( float _x, float _y, float _z )
+{
+    mVec128 = (vec_float4){ _x, _y, _z, 0.0f  };
+}
+
+inline Vector3::Vector3( Point3 pnt )
+{
+    mVec128 = pnt.get128();
+}
+
+inline Vector3::Vector3( float scalar )
+{
+    mVec128 = spu_splats( scalar );
+}
+
+inline Vector3::Vector3( vec_float4 vf4 )
+{
+    mVec128 = vf4;
+}
+
+inline const Vector3 Vector3::xAxis( )
+{
+    return Vector3( _VECTORMATH_UNIT_1000 );
+}
+
+inline const Vector3 Vector3::yAxis( )
+{
+    return Vector3( _VECTORMATH_UNIT_0100 );
+}
+
+inline const Vector3 Vector3::zAxis( )
+{
+    return Vector3( _VECTORMATH_UNIT_0010 );
+}
+
+inline const Vector3 lerp( float t, Vector3 vec0, Vector3 vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector3 slerp( float t, Vector3 unitVec0, Vector3 unitVec1 )
+{
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    vec_uint4 selectMask;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
+    cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = spu_splats(t);
+    oneMinusT = spu_sub( spu_splats(1.0f), tttt );
+    angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
+    angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
+    angles = spu_mul( angles, angle );
+    sines = sinf4( angles );
+    scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
+    scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
+    scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
+    return Vector3( spu_madd( unitVec0.get128(), scale0, spu_mul( unitVec1.get128(), scale1 ) ) );
+}
+
+inline vec_float4 Vector3::get128( ) const
+{
+    return mVec128;
+}
+
+inline void storeXYZ( Vector3 vec, vec_float4 * quad )
+{
+    vec_float4 dstVec = *quad;
+    vec_uint4 mask = (vec_uint4)spu_maskb(0x000f);
+    dstVec = spu_sel(vec.get128(), dstVec, mask);
+    *quad = dstVec;
+}
+
+inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const vec_float4 * threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyz1 = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_WABC );
+    xyz2 = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_ZWAB );
+    xyz3 = spu_rlqwbyte( zxyz, 4 );
+    vec0 = Vector3( xyzx );
+    vec1 = Vector3( xyz1 );
+    vec2 = Vector3( xyz2 );
+    vec3 = Vector3( xyz3 );
+}
+
+inline void storeXYZArray( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, vec_float4 * threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz;
+    xyzx = spu_shuffle( vec0.get128(), vec1.get128(), _VECTORMATH_SHUF_XYZA );
+    yzxy = spu_shuffle( vec1.get128(), vec2.get128(), _VECTORMATH_SHUF_YZAB );
+    zxyz = spu_shuffle( vec2.get128(), vec3.get128(), _VECTORMATH_SHUF_ZABC );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+inline void storeHalfFloats( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, Vector3 vec4, Vector3 vec5, Vector3 vec6, Vector3 vec7, vec_ushort8 * threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    storeXYZArray( vec0, vec1, vec2, vec3, xyz0 );
+    storeXYZArray( vec4, vec5, vec6, vec7, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+inline Vector3 & Vector3::operator =( Vector3 vec )
+{
+    mVec128 = vec.mVec128;
+    return *this;
+}
+
+inline Vector3 & Vector3::setX( float _x )
+{
+    mVec128 = spu_insert( _x, mVec128, 0 );
+    return *this;
+}
+
+inline float Vector3::getX( ) const
+{
+    return spu_extract( mVec128, 0 );
+}
+
+inline Vector3 & Vector3::setY( float _y )
+{
+    mVec128 = spu_insert( _y, mVec128, 1 );
+    return *this;
+}
+
+inline float Vector3::getY( ) const
+{
+    return spu_extract( mVec128, 1 );
+}
+
+inline Vector3 & Vector3::setZ( float _z )
+{
+    mVec128 = spu_insert( _z, mVec128, 2 );
+    return *this;
+}
+
+inline float Vector3::getZ( ) const
+{
+    return spu_extract( mVec128, 2 );
+}
+
+inline Vector3 & Vector3::setElem( int idx, float value )
+{
+    mVec128 = spu_insert( value, mVec128, idx );
+    return *this;
+}
+
+inline float Vector3::getElem( int idx ) const
+{
+    return spu_extract( mVec128, idx );
+}
+
+inline VecIdx Vector3::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+inline float Vector3::operator []( int idx ) const
+{
+    return spu_extract( mVec128, idx );
+}
+
+inline const Vector3 Vector3::operator +( Vector3 vec ) const
+{
+    return Vector3( spu_add( mVec128, vec.mVec128 ) );
+}
+
+inline const Vector3 Vector3::operator -( Vector3 vec ) const
+{
+    return Vector3( spu_sub( mVec128, vec.mVec128 ) );
+}
+
+inline const Point3 Vector3::operator +( Point3 pnt ) const
+{
+    return Point3( spu_add( mVec128, pnt.get128() ) );
+}
+
+inline const Vector3 Vector3::operator *( float scalar ) const
+{
+    return Vector3( spu_mul( mVec128, spu_splats(scalar) ) );
+}
+
+inline Vector3 & Vector3::operator +=( Vector3 vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator -=( Vector3 vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator /( float scalar ) const
+{
+    return Vector3( divf4( mVec128, spu_splats(scalar) ) );
+}
+
+inline Vector3 & Vector3::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator -( ) const
+{
+    return Vector3( negatef4( mVec128 ) );
+}
+
+inline const Vector3 operator *( float scalar, Vector3 vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector3 mulPerElem( Vector3 vec0, Vector3 vec1 )
+{
+    return Vector3( spu_mul( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector3 divPerElem( Vector3 vec0, Vector3 vec1 )
+{
+    return Vector3( divf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector3 recipPerElem( Vector3 vec )
+{
+    return Vector3( recipf4( vec.get128() ) );
+}
+
+inline const Vector3 sqrtPerElem( Vector3 vec )
+{
+    return Vector3( sqrtf4( vec.get128() ) );
+}
+
+inline const Vector3 rsqrtPerElem( Vector3 vec )
+{
+    return Vector3( rsqrtf4( vec.get128() ) );
+}
+
+inline const Vector3 absPerElem( Vector3 vec )
+{
+    return Vector3( fabsf4( vec.get128() ) );
+}
+
+inline const Vector3 copySignPerElem( Vector3 vec0, Vector3 vec1 )
+{
+    return Vector3( copysignf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector3 maxPerElem( Vector3 vec0, Vector3 vec1 )
+{
+    return Vector3( fmaxf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline float maxElem( Vector3 vec )
+{
+    vec_float4 result;
+    result = fmaxf4( spu_promote( spu_extract( vec.get128(), 1 ), 0 ), vec.get128() );
+    result = fmaxf4( spu_promote( spu_extract( vec.get128(), 2 ), 0 ), result );
+    return spu_extract( result, 0 );
+}
+
+inline const Vector3 minPerElem( Vector3 vec0, Vector3 vec1 )
+{
+    return Vector3( fminf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline float minElem( Vector3 vec )
+{
+    vec_float4 result;
+    result = fminf4( spu_promote( spu_extract( vec.get128(), 1 ), 0 ), vec.get128() );
+    result = fminf4( spu_promote( spu_extract( vec.get128(), 2 ), 0 ), result );
+    return spu_extract( result, 0 );
+}
+
+inline float sum( Vector3 vec )
+{
+    return
+        spu_extract( vec.get128(), 0 ) +
+        spu_extract( vec.get128(), 1 ) +
+        spu_extract( vec.get128(), 2 );
+}
+
+inline float dot( Vector3 vec0, Vector3 vec1 )
+{
+    return spu_extract( _vmathVfDot3( vec0.get128(), vec1.get128() ), 0 );
+}
+
+inline float lengthSqr( Vector3 vec )
+{
+    return spu_extract( _vmathVfDot3( vec.get128(), vec.get128() ), 0 );
+}
+
+inline float length( Vector3 vec )
+{
+    return sqrtf( lengthSqr( vec ) );
+}
+
+inline const Vector3 normalize( Vector3 vec )
+{
+    vec_float4 dot = _vmathVfDot3( vec.get128(), vec.get128() );
+    dot = spu_shuffle( dot, dot, (vec_uchar16)spu_splats(0x00010203) );
+    return Vector3( spu_mul( vec.get128(), rsqrtf4( dot ) ) );
+}
+
+inline const Vector3 cross( Vector3 vec0, Vector3 vec1 )
+{
+    return Vector3( _vmathVfCross( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector3 select( Vector3 vec0, Vector3 vec1, bool select1 )
+{
+    return Vector3( spu_sel( vec0.get128(), vec1.get128(), spu_splats( (unsigned int)-(select1 > 0) ) ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( Vector3 vec )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+inline void print( Vector3 vec, const char * name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+#endif
+
+inline Vector4::Vector4( float _x, float _y, float _z, float _w )
+{
+    mVec128 = (vec_float4){ _x, _y, _z, _w };
+}
+
+inline Vector4::Vector4( Vector3 xyz, float _w )
+{
+    mVec128 = spu_shuffle( xyz.get128(), spu_promote( _w, 0 ), _VECTORMATH_SHUF_XYZA );
+}
+
+inline Vector4::Vector4( Vector3 vec )
+{
+    mVec128 = spu_sel( vec.get128(), spu_splats(0.0f), (vec_uint4)spu_maskb(0x000f) );
+}
+
+inline Vector4::Vector4( Point3 pnt )
+{
+    mVec128 = spu_sel( pnt.get128(), spu_splats(1.0f), (vec_uint4)spu_maskb(0x000f) );
+}
+
+inline Vector4::Vector4( Quat quat )
+{
+    mVec128 = quat.get128();
+}
+
+inline Vector4::Vector4( float scalar )
+{
+    mVec128 = spu_splats( scalar );
+}
+
+inline Vector4::Vector4( vec_float4 vf4 )
+{
+    mVec128 = vf4;
+}
+
+inline const Vector4 Vector4::xAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_1000 );
+}
+
+inline const Vector4 Vector4::yAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_0100 );
+}
+
+inline const Vector4 Vector4::zAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_0010 );
+}
+
+inline const Vector4 Vector4::wAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_0001 );
+}
+
+inline const Vector4 lerp( float t, Vector4 vec0, Vector4 vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector4 slerp( float t, Vector4 unitVec0, Vector4 unitVec1 )
+{
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    vec_uint4 selectMask;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    cosAngle = _vmathVfDot4( unitVec0.get128(), unitVec1.get128() );
+    cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = spu_splats(t);
+    oneMinusT = spu_sub( spu_splats(1.0f), tttt );
+    angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
+    angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
+    angles = spu_mul( angles, angle );
+    sines = sinf4( angles );
+    scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
+    scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
+    scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
+    return Vector4( spu_madd( unitVec0.get128(), scale0, spu_mul( unitVec1.get128(), scale1 ) ) );
+}
+
+inline vec_float4 Vector4::get128( ) const
+{
+    return mVec128;
+}
+
+inline void storeHalfFloats( Vector4 vec0, Vector4 vec1, Vector4 vec2, Vector4 vec3, vec_ushort8 * twoQuads )
+{
+    twoQuads[0] = _vmath2VfToHalfFloats(vec0.get128(), vec1.get128());
+    twoQuads[1] = _vmath2VfToHalfFloats(vec2.get128(), vec3.get128());
+}
+
+inline Vector4 & Vector4::operator =( Vector4 vec )
+{
+    mVec128 = vec.mVec128;
+    return *this;
+}
+
+inline Vector4 & Vector4::setXYZ( Vector3 vec )
+{
+    mVec128 = spu_sel( vec.get128(), mVec128, (vec_uint4)spu_maskb(0x000f) );
+    return *this;
+}
+
+inline const Vector3 Vector4::getXYZ( ) const
+{
+    return Vector3( mVec128 );
+}
+
+inline Vector4 & Vector4::setX( float _x )
+{
+    mVec128 = spu_insert( _x, mVec128, 0 );
+    return *this;
+}
+
+inline float Vector4::getX( ) const
+{
+    return spu_extract( mVec128, 0 );
+}
+
+inline Vector4 & Vector4::setY( float _y )
+{
+    mVec128 = spu_insert( _y, mVec128, 1 );
+    return *this;
+}
+
+inline float Vector4::getY( ) const
+{
+    return spu_extract( mVec128, 1 );
+}
+
+inline Vector4 & Vector4::setZ( float _z )
+{
+    mVec128 = spu_insert( _z, mVec128, 2 );
+    return *this;
+}
+
+inline float Vector4::getZ( ) const
+{
+    return spu_extract( mVec128, 2 );
+}
+
+inline Vector4 & Vector4::setW( float _w )
+{
+    mVec128 = spu_insert( _w, mVec128, 3 );
+    return *this;
+}
+
+inline float Vector4::getW( ) const
+{
+    return spu_extract( mVec128, 3 );
+}
+
+inline Vector4 & Vector4::setElem( int idx, float value )
+{
+    mVec128 = spu_insert( value, mVec128, idx );
+    return *this;
+}
+
+inline float Vector4::getElem( int idx ) const
+{
+    return spu_extract( mVec128, idx );
+}
+
+inline VecIdx Vector4::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+inline float Vector4::operator []( int idx ) const
+{
+    return spu_extract( mVec128, idx );
+}
+
+inline const Vector4 Vector4::operator +( Vector4 vec ) const
+{
+    return Vector4( spu_add( mVec128, vec.mVec128 ) );
+}
+
+inline const Vector4 Vector4::operator -( Vector4 vec ) const
+{
+    return Vector4( spu_sub( mVec128, vec.mVec128 ) );
+}
+
+inline const Vector4 Vector4::operator *( float scalar ) const
+{
+    return Vector4( spu_mul( mVec128, spu_splats(scalar) ) );
+}
+
+inline Vector4 & Vector4::operator +=( Vector4 vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator -=( Vector4 vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator /( float scalar ) const
+{
+    return Vector4( divf4( mVec128, spu_splats(scalar) ) );
+}
+
+inline Vector4 & Vector4::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator -( ) const
+{
+    return Vector4( negatef4( mVec128 ) );
+}
+
+inline const Vector4 operator *( float scalar, Vector4 vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector4 mulPerElem( Vector4 vec0, Vector4 vec1 )
+{
+    return Vector4( spu_mul( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector4 divPerElem( Vector4 vec0, Vector4 vec1 )
+{
+    return Vector4( divf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector4 recipPerElem( Vector4 vec )
+{
+    return Vector4( recipf4( vec.get128() ) );
+}
+
+inline const Vector4 sqrtPerElem( Vector4 vec )
+{
+    return Vector4( sqrtf4( vec.get128() ) );
+}
+
+inline const Vector4 rsqrtPerElem( Vector4 vec )
+{
+    return Vector4( rsqrtf4( vec.get128() ) );
+}
+
+inline const Vector4 absPerElem( Vector4 vec )
+{
+    return Vector4( fabsf4( vec.get128() ) );
+}
+
+inline const Vector4 copySignPerElem( Vector4 vec0, Vector4 vec1 )
+{
+    return Vector4( copysignf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector4 maxPerElem( Vector4 vec0, Vector4 vec1 )
+{
+    return Vector4( fmaxf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline float maxElem( Vector4 vec )
+{
+    vec_float4 result;
+    result = fmaxf4( spu_promote( spu_extract( vec.get128(), 1 ), 0 ), vec.get128() );
+    result = fmaxf4( spu_promote( spu_extract( vec.get128(), 2 ), 0 ), result );
+    result = fmaxf4( spu_promote( spu_extract( vec.get128(), 3 ), 0 ), result );
+    return spu_extract( result, 0 );
+}
+
+inline const Vector4 minPerElem( Vector4 vec0, Vector4 vec1 )
+{
+    return Vector4( fminf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline float minElem( Vector4 vec )
+{
+    vec_float4 result;
+    result = fminf4( spu_promote( spu_extract( vec.get128(), 1 ), 0 ), vec.get128() );
+    result = fminf4( spu_promote( spu_extract( vec.get128(), 2 ), 0 ), result );
+    result = fminf4( spu_promote( spu_extract( vec.get128(), 3 ), 0 ), result );
+    return spu_extract( result, 0 );
+}
+
+inline float sum( Vector4 vec )
+{
+    return
+        spu_extract( vec.get128(), 0 ) +
+        spu_extract( vec.get128(), 1 ) +
+        spu_extract( vec.get128(), 2 ) +
+        spu_extract( vec.get128(), 3 );
+}
+
+inline float dot( Vector4 vec0, Vector4 vec1 )
+{
+    return spu_extract( _vmathVfDot4( vec0.get128(), vec1.get128() ), 0 );
+}
+
+inline float lengthSqr( Vector4 vec )
+{
+    return spu_extract( _vmathVfDot4( vec.get128(), vec.get128() ), 0 );
+}
+
+inline float length( Vector4 vec )
+{
+    return sqrtf( lengthSqr( vec ) );
+}
+
+inline const Vector4 normalize( Vector4 vec )
+{
+    vec_float4 dot = _vmathVfDot4( vec.get128(), vec.get128() );
+    return Vector4( spu_mul( vec.get128(), rsqrtf4( dot ) ) );
+}
+
+inline const Vector4 select( Vector4 vec0, Vector4 vec1, bool select1 )
+{
+    return Vector4( spu_sel( vec0.get128(), vec1.get128(), spu_splats( (unsigned int)-(select1 > 0) ) ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( Vector4 vec )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+inline void print( Vector4 vec, const char * name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+#endif
+
+inline Point3::Point3( float _x, float _y, float _z )
+{
+    mVec128 = (vec_float4){ _x, _y, _z, 0.0f  };
+}
+
+inline Point3::Point3( Vector3 vec )
+{
+    mVec128 = vec.get128();
+}
+
+inline Point3::Point3( float scalar )
+{
+    mVec128 = spu_splats( scalar );
+}
+
+inline Point3::Point3( vec_float4 vf4 )
+{
+    mVec128 = vf4;
+}
+
+inline const Point3 lerp( float t, Point3 pnt0, Point3 pnt1 )
+{
+    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
+}
+
+inline vec_float4 Point3::get128( ) const
+{
+    return mVec128;
+}
+
+inline void storeXYZ( Point3 pnt, vec_float4 * quad )
+{
+    vec_float4 dstVec = *quad;
+    vec_uint4 mask = (vec_uint4)spu_maskb(0x000f);
+    dstVec = spu_sel(pnt.get128(), dstVec, mask);
+    *quad = dstVec;
+}
+
+inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const vec_float4 * threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyz1 = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_WABC );
+    xyz2 = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_ZWAB );
+    xyz3 = spu_rlqwbyte( zxyz, 4 );
+    pnt0 = Point3( xyzx );
+    pnt1 = Point3( xyz1 );
+    pnt2 = Point3( xyz2 );
+    pnt3 = Point3( xyz3 );
+}
+
+inline void storeXYZArray( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, vec_float4 * threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz;
+    xyzx = spu_shuffle( pnt0.get128(), pnt1.get128(), _VECTORMATH_SHUF_XYZA );
+    yzxy = spu_shuffle( pnt1.get128(), pnt2.get128(), _VECTORMATH_SHUF_YZAB );
+    zxyz = spu_shuffle( pnt2.get128(), pnt3.get128(), _VECTORMATH_SHUF_ZABC );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+inline void storeHalfFloats( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, Point3 pnt4, Point3 pnt5, Point3 pnt6, Point3 pnt7, vec_ushort8 * threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    storeXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
+    storeXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+inline Point3 & Point3::operator =( Point3 pnt )
+{
+    mVec128 = pnt.mVec128;
+    return *this;
+}
+
+inline Point3 & Point3::setX( float _x )
+{
+    mVec128 = spu_insert( _x, mVec128, 0 );
+    return *this;
+}
+
+inline float Point3::getX( ) const
+{
+    return spu_extract( mVec128, 0 );
+}
+
+inline Point3 & Point3::setY( float _y )
+{
+    mVec128 = spu_insert( _y, mVec128, 1 );
+    return *this;
+}
+
+inline float Point3::getY( ) const
+{
+    return spu_extract( mVec128, 1 );
+}
+
+inline Point3 & Point3::setZ( float _z )
+{
+    mVec128 = spu_insert( _z, mVec128, 2 );
+    return *this;
+}
+
+inline float Point3::getZ( ) const
+{
+    return spu_extract( mVec128, 2 );
+}
+
+inline Point3 & Point3::setElem( int idx, float value )
+{
+    mVec128 = spu_insert( value, mVec128, idx );
+    return *this;
+}
+
+inline float Point3::getElem( int idx ) const
+{
+    return spu_extract( mVec128, idx );
+}
+
+inline VecIdx Point3::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+inline float Point3::operator []( int idx ) const
+{
+    return spu_extract( mVec128, idx );
+}
+
+inline const Vector3 Point3::operator -( Point3 pnt ) const
+{
+    return Vector3( spu_sub( mVec128, pnt.mVec128 ) );
+}
+
+inline const Point3 Point3::operator +( Vector3 vec ) const
+{
+    return Point3( spu_add( mVec128, vec.get128() ) );
+}
+
+inline const Point3 Point3::operator -( Vector3 vec ) const
+{
+    return Point3( spu_sub( mVec128, vec.get128() ) );
+}
+
+inline Point3 & Point3::operator +=( Vector3 vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Point3 & Point3::operator -=( Vector3 vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline const Point3 mulPerElem( Point3 pnt0, Point3 pnt1 )
+{
+    return Point3( spu_mul( pnt0.get128(), pnt1.get128() ) );
+}
+
+inline const Point3 divPerElem( Point3 pnt0, Point3 pnt1 )
+{
+    return Point3( divf4( pnt0.get128(), pnt1.get128() ) );
+}
+
+inline const Point3 recipPerElem( Point3 pnt )
+{
+    return Point3( recipf4( pnt.get128() ) );
+}
+
+inline const Point3 sqrtPerElem( Point3 pnt )
+{
+    return Point3( sqrtf4( pnt.get128() ) );
+}
+
+inline const Point3 rsqrtPerElem( Point3 pnt )
+{
+    return Point3( rsqrtf4( pnt.get128() ) );
+}
+
+inline const Point3 absPerElem( Point3 pnt )
+{
+    return Point3( fabsf4( pnt.get128() ) );
+}
+
+inline const Point3 copySignPerElem( Point3 pnt0, Point3 pnt1 )
+{
+    return Point3( copysignf4( pnt0.get128(), pnt1.get128() ) );
+}
+
+inline const Point3 maxPerElem( Point3 pnt0, Point3 pnt1 )
+{
+    return Point3( fmaxf4( pnt0.get128(), pnt1.get128() ) );
+}
+
+inline float maxElem( Point3 pnt )
+{
+    vec_float4 result;
+    result = fmaxf4( spu_promote( spu_extract( pnt.get128(), 1 ), 0 ), pnt.get128() );
+    result = fmaxf4( spu_promote( spu_extract( pnt.get128(), 2 ), 0 ), result );
+    return spu_extract( result, 0 );
+}
+
+inline const Point3 minPerElem( Point3 pnt0, Point3 pnt1 )
+{
+    return Point3( fminf4( pnt0.get128(), pnt1.get128() ) );
+}
+
+inline float minElem( Point3 pnt )
+{
+    vec_float4 result;
+    result = fminf4( spu_promote( spu_extract( pnt.get128(), 1 ), 0 ), pnt.get128() );
+    result = fminf4( spu_promote( spu_extract( pnt.get128(), 2 ), 0 ), result );
+    return spu_extract( result, 0 );
+}
+
+inline float sum( Point3 pnt )
+{
+    return
+        spu_extract( pnt.get128(), 0 ) +
+        spu_extract( pnt.get128(), 1 ) +
+        spu_extract( pnt.get128(), 2 );
+}
+
+inline const Point3 scale( Point3 pnt, float scaleVal )
+{
+    return mulPerElem( pnt, Point3( scaleVal ) );
+}
+
+inline const Point3 scale( Point3 pnt, Vector3 scaleVec )
+{
+    return mulPerElem( pnt, Point3( scaleVec ) );
+}
+
+inline float projection( Point3 pnt, Vector3 unitVec )
+{
+    return spu_extract( _vmathVfDot3( pnt.get128(), unitVec.get128() ), 0 );
+}
+
+inline float distSqrFromOrigin( Point3 pnt )
+{
+    return lengthSqr( Vector3( pnt ) );
+}
+
+inline float distFromOrigin( Point3 pnt )
+{
+    return length( Vector3( pnt ) );
+}
+
+inline float distSqr( Point3 pnt0, Point3 pnt1 )
+{
+    return lengthSqr( ( pnt1 - pnt0 ) );
+}
+
+inline float dist( Point3 pnt0, Point3 pnt1 )
+{
+    return length( ( pnt1 - pnt0 ) );
+}
+
+inline const Point3 select( Point3 pnt0, Point3 pnt1, bool select1 )
+{
+    return Point3( spu_sel( pnt0.get128(), pnt1.get128(), spu_splats( (unsigned int)-(select1 > 0) ) ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( Point3 pnt )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = pnt.get128();
+    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+inline void print( Point3 pnt, const char * name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = pnt.get128();
+    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/cpp/vec_soa.h b/Extras/vectormathlibrary/include/vectormath/spu/cpp/vec_soa.h
index 1e4e04b2d..1ac657732 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/cpp/vec_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/cpp/vec_soa.h
@@ -1,1439 +1,1439 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_SOA_CPP_H
-#define _VECTORMATH_VEC_SOA_CPP_H
-//-----------------------------------------------------------------------------
-// Constants
-// for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
-
-#define _VECTORMATH_SHUF_X 0x00010203
-#define _VECTORMATH_SHUF_Y 0x04050607
-#define _VECTORMATH_SHUF_Z 0x08090a0b
-#define _VECTORMATH_SHUF_W 0x0c0d0e0f
-#define _VECTORMATH_SHUF_A 0x10111213
-#define _VECTORMATH_SHUF_B 0x14151617
-#define _VECTORMATH_SHUF_C 0x18191a1b
-#define _VECTORMATH_SHUF_D 0x1c1d1e1f
-#define _VECTORMATH_SHUF_0 0x80808080
-#define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_ZDW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_ZDXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_YAWC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_XYCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-namespace Vectormath {
-namespace Soa {
-
-inline Vector3::Vector3( const Vector3 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-}
-
-inline Vector3::Vector3( vec_float4 _x, vec_float4 _y, vec_float4 _z )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-}
-
-inline Vector3::Vector3( const Point3 & pnt )
-{
-    mX = pnt.getX();
-    mY = pnt.getY();
-    mZ = pnt.getZ();
-}
-
-inline Vector3::Vector3( vec_float4 scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-}
-
-inline Vector3::Vector3( Aos::Vector3 vec )
-{
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    vec_float4 vec128 = vec.get128();
-    mX = spu_shuffle( vec128, vec128, shuffle_xxxx );
-    mY = spu_shuffle( vec128, vec128, shuffle_yyyy );
-    mZ = spu_shuffle( vec128, vec128, shuffle_zzzz );
-}
-
-inline Vector3::Vector3( Aos::Vector3 vec0, Aos::Vector3 vec1, Aos::Vector3 vec2, Aos::Vector3 vec3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = spu_shuffle( vec0.get128(), vec2.get128(), _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( vec1.get128(), vec3.get128(), _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( vec0.get128(), vec2.get128(), _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( vec1.get128(), vec3.get128(), _VECTORMATH_SHUF_ZCWD );
-    mX = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
-    mY = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
-    mZ = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
-}
-
-inline const Vector3 Vector3::xAxis( )
-{
-    return Vector3( spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f) );
-}
-
-inline const Vector3 Vector3::yAxis( )
-{
-    return Vector3( spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f) );
-}
-
-inline const Vector3 Vector3::zAxis( )
-{
-    return Vector3( spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
-}
-
-inline const Vector3 lerp( vec_float4 t, const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
-}
-
-inline const Vector3 slerp( vec_float4 t, const Vector3 & unitVec0, const Vector3 & unitVec1 )
-{
-    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
-    vec_uint4 selectMask;
-    cosAngle = dot( unitVec0, unitVec1 );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    recipSinAngle = recipf4( sinf4( angle ) );
-    scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
-    scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
-    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
-}
-
-inline void Vector3::get4Aos( Aos::Vector3 & result0, Aos::Vector3 & result1, Aos::Vector3 & result2, Aos::Vector3 & result3 ) const
-{
-    vec_float4 tmp0, tmp1;
-    tmp0 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_ZCWD );
-    result0 = Aos::Vector3( spu_shuffle( tmp0, mY, _VECTORMATH_SHUF_XAYB ) );
-    result1 = Aos::Vector3( spu_shuffle( tmp0, mY, _VECTORMATH_SHUF_ZBW0 ) );
-    result2 = Aos::Vector3( spu_shuffle( tmp1, mY, _VECTORMATH_SHUF_XCY0 ) );
-    result3 = Aos::Vector3( spu_shuffle( tmp1, mY, _VECTORMATH_SHUF_ZDW0 ) );
-}
-
-inline void loadXYZArray( Vector3 & vec, const vec_float4 * threeQuads )
-{
-    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyxy = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_XYCD );
-    zxzx = spu_shuffle( zxyz, xyzx, _VECTORMATH_SHUF_XYCD );
-    yzyz = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_XYCD );
-    vec.setX( spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XDZB ) );
-    vec.setY( spu_shuffle( xyxy, yzyz, _VECTORMATH_SHUF_YAWC ) );
-    vec.setZ( spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_ZBXD ) );
-}
-
-inline void storeXYZArray( const Vector3 & vec, vec_float4 * threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
-    xyxy = spu_shuffle( vec.getX(), vec.getY(), _VECTORMATH_SHUF_XAZC );
-    zxzx = spu_shuffle( vec.getZ(), vec.getX(), _VECTORMATH_SHUF_ZDXB );
-    yzyz = spu_shuffle( vec.getY(), vec.getZ(), _VECTORMATH_SHUF_YBWD );
-    xyzx = spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XYCD );
-    yzxy = spu_shuffle( yzyz, xyxy, _VECTORMATH_SHUF_XYCD );
-    zxyz = spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_XYCD );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-inline void storeHalfFloats( const Vector3 & vec0, const Vector3 & vec1, vec_ushort8 * threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    storeXYZArray( vec0, xyz0 );
-    storeXYZArray( vec1, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-inline Vector3 & Vector3::operator =( const Vector3 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-    return *this;
-}
-
-inline Vector3 & Vector3::setX( vec_float4 _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline vec_float4 Vector3::getX( ) const
-{
-    return mX;
-}
-
-inline Vector3 & Vector3::setY( vec_float4 _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline vec_float4 Vector3::getY( ) const
-{
-    return mY;
-}
-
-inline Vector3 & Vector3::setZ( vec_float4 _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline vec_float4 Vector3::getZ( ) const
-{
-    return mZ;
-}
-
-inline Vector3 & Vector3::setElem( int idx, vec_float4 value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline vec_float4 Vector3::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline Vector3::vec_float4_t & Vector3::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline vec_float4 Vector3::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Vector3 Vector3::operator +( const Vector3 & vec ) const
-{
-    return Vector3(
-        spu_add( mX, vec.mX ),
-        spu_add( mY, vec.mY ),
-        spu_add( mZ, vec.mZ )
-    );
-}
-
-inline const Vector3 Vector3::operator -( const Vector3 & vec ) const
-{
-    return Vector3(
-        spu_sub( mX, vec.mX ),
-        spu_sub( mY, vec.mY ),
-        spu_sub( mZ, vec.mZ )
-    );
-}
-
-inline const Point3 Vector3::operator +( const Point3 & pnt ) const
-{
-    return Point3(
-        spu_add( mX, pnt.getX() ),
-        spu_add( mY, pnt.getY() ),
-        spu_add( mZ, pnt.getZ() )
-    );
-}
-
-inline const Vector3 Vector3::operator *( vec_float4 scalar ) const
-{
-    return Vector3(
-        spu_mul( mX, scalar ),
-        spu_mul( mY, scalar ),
-        spu_mul( mZ, scalar )
-    );
-}
-
-inline Vector3 & Vector3::operator +=( const Vector3 & vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator -=( const Vector3 & vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator *=( vec_float4 scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Vector3 Vector3::operator /( vec_float4 scalar ) const
-{
-    return Vector3(
-        divf4( mX, scalar ),
-        divf4( mY, scalar ),
-        divf4( mZ, scalar )
-    );
-}
-
-inline Vector3 & Vector3::operator /=( vec_float4 scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Vector3 Vector3::operator -( ) const
-{
-    return Vector3(
-        negatef4( mX ),
-        negatef4( mY ),
-        negatef4( mZ )
-    );
-}
-
-inline const Vector3 operator *( vec_float4 scalar, const Vector3 & vec )
-{
-    return vec * scalar;
-}
-
-inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        spu_mul( vec0.getX(), vec1.getX() ),
-        spu_mul( vec0.getY(), vec1.getY() ),
-        spu_mul( vec0.getZ(), vec1.getZ() )
-    );
-}
-
-inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        divf4( vec0.getX(), vec1.getX() ),
-        divf4( vec0.getY(), vec1.getY() ),
-        divf4( vec0.getZ(), vec1.getZ() )
-    );
-}
-
-inline const Vector3 recipPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        recipf4( vec.getX() ),
-        recipf4( vec.getY() ),
-        recipf4( vec.getZ() )
-    );
-}
-
-inline const Vector3 sqrtPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        sqrtf4( vec.getX() ),
-        sqrtf4( vec.getY() ),
-        sqrtf4( vec.getZ() )
-    );
-}
-
-inline const Vector3 rsqrtPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        rsqrtf4( vec.getX() ),
-        rsqrtf4( vec.getY() ),
-        rsqrtf4( vec.getZ() )
-    );
-}
-
-inline const Vector3 absPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        fabsf4( vec.getX() ),
-        fabsf4( vec.getY() ),
-        fabsf4( vec.getZ() )
-    );
-}
-
-inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        copysignf4( vec0.getX(), vec1.getX() ),
-        copysignf4( vec0.getY(), vec1.getY() ),
-        copysignf4( vec0.getZ(), vec1.getZ() )
-    );
-}
-
-inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        fmaxf4( vec0.getX(), vec1.getX() ),
-        fmaxf4( vec0.getY(), vec1.getY() ),
-        fmaxf4( vec0.getZ(), vec1.getZ() )
-    );
-}
-
-inline vec_float4 maxElem( const Vector3 & vec )
-{
-    vec_float4 result;
-    result = fmaxf4( vec.getX(), vec.getY() );
-    result = fmaxf4( vec.getZ(), result );
-    return result;
-}
-
-inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        fminf4( vec0.getX(), vec1.getX() ),
-        fminf4( vec0.getY(), vec1.getY() ),
-        fminf4( vec0.getZ(), vec1.getZ() )
-    );
-}
-
-inline vec_float4 minElem( const Vector3 & vec )
-{
-    vec_float4 result;
-    result = fminf4( vec.getX(), vec.getY() );
-    result = fminf4( vec.getZ(), result );
-    return result;
-}
-
-inline vec_float4 sum( const Vector3 & vec )
-{
-    vec_float4 result;
-    result = spu_add( vec.getX(), vec.getY() );
-    result = spu_add( result, vec.getZ() );
-    return result;
-}
-
-inline vec_float4 dot( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    vec_float4 result;
-    result = spu_mul( vec0.getX(), vec1.getX() );
-    result = spu_add( result, spu_mul( vec0.getY(), vec1.getY() ) );
-    result = spu_add( result, spu_mul( vec0.getZ(), vec1.getZ() ) );
-    return result;
-}
-
-inline vec_float4 lengthSqr( const Vector3 & vec )
-{
-    vec_float4 result;
-    result = spu_mul( vec.getX(), vec.getX() );
-    result = spu_add( result, spu_mul( vec.getY(), vec.getY() ) );
-    result = spu_add( result, spu_mul( vec.getZ(), vec.getZ() ) );
-    return result;
-}
-
-inline vec_float4 length( const Vector3 & vec )
-{
-    return sqrtf4( lengthSqr( vec ) );
-}
-
-inline const Vector3 normalize( const Vector3 & vec )
-{
-    vec_float4 lenSqr, lenInv;
-    lenSqr = lengthSqr( vec );
-    lenInv = rsqrtf4( lenSqr );
-    return Vector3(
-        spu_mul( vec.getX(), lenInv ),
-        spu_mul( vec.getY(), lenInv ),
-        spu_mul( vec.getZ(), lenInv )
-    );
-}
-
-inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        spu_sub( spu_mul( vec0.getY(), vec1.getZ() ), spu_mul( vec0.getZ(), vec1.getY() ) ),
-        spu_sub( spu_mul( vec0.getZ(), vec1.getX() ), spu_mul( vec0.getX(), vec1.getZ() ) ),
-        spu_sub( spu_mul( vec0.getX(), vec1.getY() ), spu_mul( vec0.getY(), vec1.getX() ) )
-    );
-}
-
-inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, vec_uint4 select1 )
-{
-    return Vector3(
-        spu_sel( vec0.getX(), vec1.getX(), select1 ),
-        spu_sel( vec0.getY(), vec1.getY(), select1 ),
-        spu_sel( vec0.getZ(), vec1.getZ(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Vector3 & vec )
-{
-    Aos::Vector3 vec0, vec1, vec2, vec3;
-    vec.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-inline void print( const Vector3 & vec, const char * name )
-{
-    Aos::Vector3 vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    vec.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-#endif
-
-inline Vector4::Vector4( const Vector4 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-    mW = vec.mW;
-}
-
-inline Vector4::Vector4( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-    mW = _w;
-}
-
-inline Vector4::Vector4( const Vector3 & xyz, vec_float4 _w )
-{
-    this->setXYZ( xyz );
-    this->setW( _w );
-}
-
-inline Vector4::Vector4( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    mW = spu_splats(0.0f);
-}
-
-inline Vector4::Vector4( const Point3 & pnt )
-{
-    mX = pnt.getX();
-    mY = pnt.getY();
-    mZ = pnt.getZ();
-    mW = spu_splats(1.0f);
-}
-
-inline Vector4::Vector4( const Quat & quat )
-{
-    mX = quat.getX();
-    mY = quat.getY();
-    mZ = quat.getZ();
-    mW = quat.getW();
-}
-
-inline Vector4::Vector4( vec_float4 scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-    mW = scalar;
-}
-
-inline Vector4::Vector4( Aos::Vector4 vec )
-{
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
-    vec_float4 vec128 = vec.get128();
-    mX = spu_shuffle( vec128, vec128, shuffle_xxxx );
-    mY = spu_shuffle( vec128, vec128, shuffle_yyyy );
-    mZ = spu_shuffle( vec128, vec128, shuffle_zzzz );
-    mW = spu_shuffle( vec128, vec128, shuffle_wwww );
-}
-
-inline Vector4::Vector4( Aos::Vector4 vec0, Aos::Vector4 vec1, Aos::Vector4 vec2, Aos::Vector4 vec3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = spu_shuffle( vec0.get128(), vec2.get128(), _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( vec1.get128(), vec3.get128(), _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( vec0.get128(), vec2.get128(), _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( vec1.get128(), vec3.get128(), _VECTORMATH_SHUF_ZCWD );
-    mX = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
-    mY = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
-    mZ = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
-    mW = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
-}
-
-inline const Vector4 Vector4::xAxis( )
-{
-    return Vector4( spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
-}
-
-inline const Vector4 Vector4::yAxis( )
-{
-    return Vector4( spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f) );
-}
-
-inline const Vector4 Vector4::zAxis( )
-{
-    return Vector4( spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f) );
-}
-
-inline const Vector4 Vector4::wAxis( )
-{
-    return Vector4( spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
-}
-
-inline const Vector4 lerp( vec_float4 t, const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
-}
-
-inline const Vector4 slerp( vec_float4 t, const Vector4 & unitVec0, const Vector4 & unitVec1 )
-{
-    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
-    vec_uint4 selectMask;
-    cosAngle = dot( unitVec0, unitVec1 );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    recipSinAngle = recipf4( sinf4( angle ) );
-    scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
-    scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
-    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
-}
-
-inline void Vector4::get4Aos( Aos::Vector4 & result0, Aos::Vector4 & result1, Aos::Vector4 & result2, Aos::Vector4 & result3 ) const
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( mY, mW, _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( mY, mW, _VECTORMATH_SHUF_ZCWD );
-    result0 = Aos::Vector4( spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB ) );
-    result1 = Aos::Vector4( spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD ) );
-    result2 = Aos::Vector4( spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB ) );
-    result3 = Aos::Vector4( spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD ) );
-}
-
-inline void storeHalfFloats( const Vector4 & vec, vec_ushort8 * twoQuads )
-{
-    Aos::Vector4 v0, v1, v2, v3;
-    vec.get4Aos( v0, v1, v2, v3 );
-    twoQuads[0] = _vmath2VfToHalfFloats(v0.get128(), v1.get128());
-    twoQuads[1] = _vmath2VfToHalfFloats(v2.get128(), v3.get128());
-}
-
-inline Vector4 & Vector4::operator =( const Vector4 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-    mW = vec.mW;
-    return *this;
-}
-
-inline Vector4 & Vector4::setXYZ( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    return *this;
-}
-
-inline const Vector3 Vector4::getXYZ( ) const
-{
-    return Vector3( mX, mY, mZ );
-}
-
-inline Vector4 & Vector4::setX( vec_float4 _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline vec_float4 Vector4::getX( ) const
-{
-    return mX;
-}
-
-inline Vector4 & Vector4::setY( vec_float4 _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline vec_float4 Vector4::getY( ) const
-{
-    return mY;
-}
-
-inline Vector4 & Vector4::setZ( vec_float4 _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline vec_float4 Vector4::getZ( ) const
-{
-    return mZ;
-}
-
-inline Vector4 & Vector4::setW( vec_float4 _w )
-{
-    mW = _w;
-    return *this;
-}
-
-inline vec_float4 Vector4::getW( ) const
-{
-    return mW;
-}
-
-inline Vector4 & Vector4::setElem( int idx, vec_float4 value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline vec_float4 Vector4::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline Vector4::vec_float4_t & Vector4::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline vec_float4 Vector4::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Vector4 Vector4::operator +( const Vector4 & vec ) const
-{
-    return Vector4(
-        spu_add( mX, vec.mX ),
-        spu_add( mY, vec.mY ),
-        spu_add( mZ, vec.mZ ),
-        spu_add( mW, vec.mW )
-    );
-}
-
-inline const Vector4 Vector4::operator -( const Vector4 & vec ) const
-{
-    return Vector4(
-        spu_sub( mX, vec.mX ),
-        spu_sub( mY, vec.mY ),
-        spu_sub( mZ, vec.mZ ),
-        spu_sub( mW, vec.mW )
-    );
-}
-
-inline const Vector4 Vector4::operator *( vec_float4 scalar ) const
-{
-    return Vector4(
-        spu_mul( mX, scalar ),
-        spu_mul( mY, scalar ),
-        spu_mul( mZ, scalar ),
-        spu_mul( mW, scalar )
-    );
-}
-
-inline Vector4 & Vector4::operator +=( const Vector4 & vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator -=( const Vector4 & vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator *=( vec_float4 scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Vector4 Vector4::operator /( vec_float4 scalar ) const
-{
-    return Vector4(
-        divf4( mX, scalar ),
-        divf4( mY, scalar ),
-        divf4( mZ, scalar ),
-        divf4( mW, scalar )
-    );
-}
-
-inline Vector4 & Vector4::operator /=( vec_float4 scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Vector4 Vector4::operator -( ) const
-{
-    return Vector4(
-        negatef4( mX ),
-        negatef4( mY ),
-        negatef4( mZ ),
-        negatef4( mW )
-    );
-}
-
-inline const Vector4 operator *( vec_float4 scalar, const Vector4 & vec )
-{
-    return vec * scalar;
-}
-
-inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        spu_mul( vec0.getX(), vec1.getX() ),
-        spu_mul( vec0.getY(), vec1.getY() ),
-        spu_mul( vec0.getZ(), vec1.getZ() ),
-        spu_mul( vec0.getW(), vec1.getW() )
-    );
-}
-
-inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        divf4( vec0.getX(), vec1.getX() ),
-        divf4( vec0.getY(), vec1.getY() ),
-        divf4( vec0.getZ(), vec1.getZ() ),
-        divf4( vec0.getW(), vec1.getW() )
-    );
-}
-
-inline const Vector4 recipPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        recipf4( vec.getX() ),
-        recipf4( vec.getY() ),
-        recipf4( vec.getZ() ),
-        recipf4( vec.getW() )
-    );
-}
-
-inline const Vector4 sqrtPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        sqrtf4( vec.getX() ),
-        sqrtf4( vec.getY() ),
-        sqrtf4( vec.getZ() ),
-        sqrtf4( vec.getW() )
-    );
-}
-
-inline const Vector4 rsqrtPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        rsqrtf4( vec.getX() ),
-        rsqrtf4( vec.getY() ),
-        rsqrtf4( vec.getZ() ),
-        rsqrtf4( vec.getW() )
-    );
-}
-
-inline const Vector4 absPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        fabsf4( vec.getX() ),
-        fabsf4( vec.getY() ),
-        fabsf4( vec.getZ() ),
-        fabsf4( vec.getW() )
-    );
-}
-
-inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        copysignf4( vec0.getX(), vec1.getX() ),
-        copysignf4( vec0.getY(), vec1.getY() ),
-        copysignf4( vec0.getZ(), vec1.getZ() ),
-        copysignf4( vec0.getW(), vec1.getW() )
-    );
-}
-
-inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        fmaxf4( vec0.getX(), vec1.getX() ),
-        fmaxf4( vec0.getY(), vec1.getY() ),
-        fmaxf4( vec0.getZ(), vec1.getZ() ),
-        fmaxf4( vec0.getW(), vec1.getW() )
-    );
-}
-
-inline vec_float4 maxElem( const Vector4 & vec )
-{
-    vec_float4 result;
-    result = fmaxf4( vec.getX(), vec.getY() );
-    result = fmaxf4( vec.getZ(), result );
-    result = fmaxf4( vec.getW(), result );
-    return result;
-}
-
-inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        fminf4( vec0.getX(), vec1.getX() ),
-        fminf4( vec0.getY(), vec1.getY() ),
-        fminf4( vec0.getZ(), vec1.getZ() ),
-        fminf4( vec0.getW(), vec1.getW() )
-    );
-}
-
-inline vec_float4 minElem( const Vector4 & vec )
-{
-    vec_float4 result;
-    result = fminf4( vec.getX(), vec.getY() );
-    result = fminf4( vec.getZ(), result );
-    result = fminf4( vec.getW(), result );
-    return result;
-}
-
-inline vec_float4 sum( const Vector4 & vec )
-{
-    vec_float4 result;
-    result = spu_add( vec.getX(), vec.getY() );
-    result = spu_add( result, vec.getZ() );
-    result = spu_add( result, vec.getW() );
-    return result;
-}
-
-inline vec_float4 dot( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    vec_float4 result;
-    result = spu_mul( vec0.getX(), vec1.getX() );
-    result = spu_add( result, spu_mul( vec0.getY(), vec1.getY() ) );
-    result = spu_add( result, spu_mul( vec0.getZ(), vec1.getZ() ) );
-    result = spu_add( result, spu_mul( vec0.getW(), vec1.getW() ) );
-    return result;
-}
-
-inline vec_float4 lengthSqr( const Vector4 & vec )
-{
-    vec_float4 result;
-    result = spu_mul( vec.getX(), vec.getX() );
-    result = spu_add( result, spu_mul( vec.getY(), vec.getY() ) );
-    result = spu_add( result, spu_mul( vec.getZ(), vec.getZ() ) );
-    result = spu_add( result, spu_mul( vec.getW(), vec.getW() ) );
-    return result;
-}
-
-inline vec_float4 length( const Vector4 & vec )
-{
-    return sqrtf4( lengthSqr( vec ) );
-}
-
-inline const Vector4 normalize( const Vector4 & vec )
-{
-    vec_float4 lenSqr, lenInv;
-    lenSqr = lengthSqr( vec );
-    lenInv = rsqrtf4( lenSqr );
-    return Vector4(
-        spu_mul( vec.getX(), lenInv ),
-        spu_mul( vec.getY(), lenInv ),
-        spu_mul( vec.getZ(), lenInv ),
-        spu_mul( vec.getW(), lenInv )
-    );
-}
-
-inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, vec_uint4 select1 )
-{
-    return Vector4(
-        spu_sel( vec0.getX(), vec1.getX(), select1 ),
-        spu_sel( vec0.getY(), vec1.getY(), select1 ),
-        spu_sel( vec0.getZ(), vec1.getZ(), select1 ),
-        spu_sel( vec0.getW(), vec1.getW(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Vector4 & vec )
-{
-    Aos::Vector4 vec0, vec1, vec2, vec3;
-    vec.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-inline void print( const Vector4 & vec, const char * name )
-{
-    Aos::Vector4 vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    vec.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-#endif
-
-inline Point3::Point3( const Point3 & pnt )
-{
-    mX = pnt.mX;
-    mY = pnt.mY;
-    mZ = pnt.mZ;
-}
-
-inline Point3::Point3( vec_float4 _x, vec_float4 _y, vec_float4 _z )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-}
-
-inline Point3::Point3( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-}
-
-inline Point3::Point3( vec_float4 scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-}
-
-inline Point3::Point3( Aos::Point3 pnt )
-{
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    vec_float4 vec128 = pnt.get128();
-    mX = spu_shuffle( vec128, vec128, shuffle_xxxx );
-    mY = spu_shuffle( vec128, vec128, shuffle_yyyy );
-    mZ = spu_shuffle( vec128, vec128, shuffle_zzzz );
-}
-
-inline Point3::Point3( Aos::Point3 pnt0, Aos::Point3 pnt1, Aos::Point3 pnt2, Aos::Point3 pnt3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = spu_shuffle( pnt0.get128(), pnt2.get128(), _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( pnt1.get128(), pnt3.get128(), _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( pnt0.get128(), pnt2.get128(), _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( pnt1.get128(), pnt3.get128(), _VECTORMATH_SHUF_ZCWD );
-    mX = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
-    mY = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
-    mZ = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
-}
-
-inline const Point3 lerp( vec_float4 t, const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
-}
-
-inline void Point3::get4Aos( Aos::Point3 & result0, Aos::Point3 & result1, Aos::Point3 & result2, Aos::Point3 & result3 ) const
-{
-    vec_float4 tmp0, tmp1;
-    tmp0 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_ZCWD );
-    result0 = Aos::Point3( spu_shuffle( tmp0, mY, _VECTORMATH_SHUF_XAYB ) );
-    result1 = Aos::Point3( spu_shuffle( tmp0, mY, _VECTORMATH_SHUF_ZBW0 ) );
-    result2 = Aos::Point3( spu_shuffle( tmp1, mY, _VECTORMATH_SHUF_XCY0 ) );
-    result3 = Aos::Point3( spu_shuffle( tmp1, mY, _VECTORMATH_SHUF_ZDW0 ) );
-}
-
-inline void loadXYZArray( Point3 & vec, const vec_float4 * threeQuads )
-{
-    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyxy = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_XYCD );
-    zxzx = spu_shuffle( zxyz, xyzx, _VECTORMATH_SHUF_XYCD );
-    yzyz = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_XYCD );
-    vec.setX( spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XDZB ) );
-    vec.setY( spu_shuffle( xyxy, yzyz, _VECTORMATH_SHUF_YAWC ) );
-    vec.setZ( spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_ZBXD ) );
-}
-
-inline void storeXYZArray( const Point3 & vec, vec_float4 * threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
-    xyxy = spu_shuffle( vec.getX(), vec.getY(), _VECTORMATH_SHUF_XAZC );
-    zxzx = spu_shuffle( vec.getZ(), vec.getX(), _VECTORMATH_SHUF_ZDXB );
-    yzyz = spu_shuffle( vec.getY(), vec.getZ(), _VECTORMATH_SHUF_YBWD );
-    xyzx = spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XYCD );
-    yzxy = spu_shuffle( yzyz, xyxy, _VECTORMATH_SHUF_XYCD );
-    zxyz = spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_XYCD );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-inline void storeHalfFloats( const Point3 & pnt0, const Point3 & pnt1, vec_ushort8 * threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    storeXYZArray( pnt0, xyz0 );
-    storeXYZArray( pnt1, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-inline Point3 & Point3::operator =( const Point3 & pnt )
-{
-    mX = pnt.mX;
-    mY = pnt.mY;
-    mZ = pnt.mZ;
-    return *this;
-}
-
-inline Point3 & Point3::setX( vec_float4 _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline vec_float4 Point3::getX( ) const
-{
-    return mX;
-}
-
-inline Point3 & Point3::setY( vec_float4 _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline vec_float4 Point3::getY( ) const
-{
-    return mY;
-}
-
-inline Point3 & Point3::setZ( vec_float4 _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline vec_float4 Point3::getZ( ) const
-{
-    return mZ;
-}
-
-inline Point3 & Point3::setElem( int idx, vec_float4 value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline vec_float4 Point3::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline Point3::vec_float4_t & Point3::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline vec_float4 Point3::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Vector3 Point3::operator -( const Point3 & pnt ) const
-{
-    return Vector3(
-        spu_sub( mX, pnt.mX ),
-        spu_sub( mY, pnt.mY ),
-        spu_sub( mZ, pnt.mZ )
-    );
-}
-
-inline const Point3 Point3::operator +( const Vector3 & vec ) const
-{
-    return Point3(
-        spu_add( mX, vec.getX() ),
-        spu_add( mY, vec.getY() ),
-        spu_add( mZ, vec.getZ() )
-    );
-}
-
-inline const Point3 Point3::operator -( const Vector3 & vec ) const
-{
-    return Point3(
-        spu_sub( mX, vec.getX() ),
-        spu_sub( mY, vec.getY() ),
-        spu_sub( mZ, vec.getZ() )
-    );
-}
-
-inline Point3 & Point3::operator +=( const Vector3 & vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Point3 & Point3::operator -=( const Vector3 & vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        spu_mul( pnt0.getX(), pnt1.getX() ),
-        spu_mul( pnt0.getY(), pnt1.getY() ),
-        spu_mul( pnt0.getZ(), pnt1.getZ() )
-    );
-}
-
-inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        divf4( pnt0.getX(), pnt1.getX() ),
-        divf4( pnt0.getY(), pnt1.getY() ),
-        divf4( pnt0.getZ(), pnt1.getZ() )
-    );
-}
-
-inline const Point3 recipPerElem( const Point3 & pnt )
-{
-    return Point3(
-        recipf4( pnt.getX() ),
-        recipf4( pnt.getY() ),
-        recipf4( pnt.getZ() )
-    );
-}
-
-inline const Point3 sqrtPerElem( const Point3 & pnt )
-{
-    return Point3(
-        sqrtf4( pnt.getX() ),
-        sqrtf4( pnt.getY() ),
-        sqrtf4( pnt.getZ() )
-    );
-}
-
-inline const Point3 rsqrtPerElem( const Point3 & pnt )
-{
-    return Point3(
-        rsqrtf4( pnt.getX() ),
-        rsqrtf4( pnt.getY() ),
-        rsqrtf4( pnt.getZ() )
-    );
-}
-
-inline const Point3 absPerElem( const Point3 & pnt )
-{
-    return Point3(
-        fabsf4( pnt.getX() ),
-        fabsf4( pnt.getY() ),
-        fabsf4( pnt.getZ() )
-    );
-}
-
-inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        copysignf4( pnt0.getX(), pnt1.getX() ),
-        copysignf4( pnt0.getY(), pnt1.getY() ),
-        copysignf4( pnt0.getZ(), pnt1.getZ() )
-    );
-}
-
-inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        fmaxf4( pnt0.getX(), pnt1.getX() ),
-        fmaxf4( pnt0.getY(), pnt1.getY() ),
-        fmaxf4( pnt0.getZ(), pnt1.getZ() )
-    );
-}
-
-inline vec_float4 maxElem( const Point3 & pnt )
-{
-    vec_float4 result;
-    result = fmaxf4( pnt.getX(), pnt.getY() );
-    result = fmaxf4( pnt.getZ(), result );
-    return result;
-}
-
-inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        fminf4( pnt0.getX(), pnt1.getX() ),
-        fminf4( pnt0.getY(), pnt1.getY() ),
-        fminf4( pnt0.getZ(), pnt1.getZ() )
-    );
-}
-
-inline vec_float4 minElem( const Point3 & pnt )
-{
-    vec_float4 result;
-    result = fminf4( pnt.getX(), pnt.getY() );
-    result = fminf4( pnt.getZ(), result );
-    return result;
-}
-
-inline vec_float4 sum( const Point3 & pnt )
-{
-    vec_float4 result;
-    result = spu_add( pnt.getX(), pnt.getY() );
-    result = spu_add( result, pnt.getZ() );
-    return result;
-}
-
-inline const Point3 scale( const Point3 & pnt, vec_float4 scaleVal )
-{
-    return mulPerElem( pnt, Point3( scaleVal ) );
-}
-
-inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec )
-{
-    return mulPerElem( pnt, Point3( scaleVec ) );
-}
-
-inline vec_float4 projection( const Point3 & pnt, const Vector3 & unitVec )
-{
-    vec_float4 result;
-    result = spu_mul( pnt.getX(), unitVec.getX() );
-    result = spu_add( result, spu_mul( pnt.getY(), unitVec.getY() ) );
-    result = spu_add( result, spu_mul( pnt.getZ(), unitVec.getZ() ) );
-    return result;
-}
-
-inline vec_float4 distSqrFromOrigin( const Point3 & pnt )
-{
-    return lengthSqr( Vector3( pnt ) );
-}
-
-inline vec_float4 distFromOrigin( const Point3 & pnt )
-{
-    return length( Vector3( pnt ) );
-}
-
-inline vec_float4 distSqr( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return lengthSqr( ( pnt1 - pnt0 ) );
-}
-
-inline vec_float4 dist( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return length( ( pnt1 - pnt0 ) );
-}
-
-inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, vec_uint4 select1 )
-{
-    return Point3(
-        spu_sel( pnt0.getX(), pnt1.getX(), select1 ),
-        spu_sel( pnt0.getY(), pnt1.getY(), select1 ),
-        spu_sel( pnt0.getZ(), pnt1.getZ(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Point3 & pnt )
-{
-    Aos::Point3 vec0, vec1, vec2, vec3;
-    pnt.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-inline void print( const Point3 & pnt, const char * name )
-{
-    Aos::Point3 vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    pnt.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-#endif
-
-} // namespace Soa
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_SOA_CPP_H
+#define _VECTORMATH_VEC_SOA_CPP_H
+//-----------------------------------------------------------------------------
+// Constants
+// for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+
+#define _VECTORMATH_SHUF_X 0x00010203
+#define _VECTORMATH_SHUF_Y 0x04050607
+#define _VECTORMATH_SHUF_Z 0x08090a0b
+#define _VECTORMATH_SHUF_W 0x0c0d0e0f
+#define _VECTORMATH_SHUF_A 0x10111213
+#define _VECTORMATH_SHUF_B 0x14151617
+#define _VECTORMATH_SHUF_C 0x18191a1b
+#define _VECTORMATH_SHUF_D 0x1c1d1e1f
+#define _VECTORMATH_SHUF_0 0x80808080
+#define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_ZDW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_ZDXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_YAWC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_XYCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+namespace Vectormath {
+namespace Soa {
+
+inline Vector3::Vector3( const Vector3 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+}
+
+inline Vector3::Vector3( vec_float4 _x, vec_float4 _y, vec_float4 _z )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+}
+
+inline Vector3::Vector3( const Point3 & pnt )
+{
+    mX = pnt.getX();
+    mY = pnt.getY();
+    mZ = pnt.getZ();
+}
+
+inline Vector3::Vector3( vec_float4 scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+}
+
+inline Vector3::Vector3( Aos::Vector3 vec )
+{
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    vec_float4 vec128 = vec.get128();
+    mX = spu_shuffle( vec128, vec128, shuffle_xxxx );
+    mY = spu_shuffle( vec128, vec128, shuffle_yyyy );
+    mZ = spu_shuffle( vec128, vec128, shuffle_zzzz );
+}
+
+inline Vector3::Vector3( Aos::Vector3 vec0, Aos::Vector3 vec1, Aos::Vector3 vec2, Aos::Vector3 vec3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = spu_shuffle( vec0.get128(), vec2.get128(), _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( vec1.get128(), vec3.get128(), _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( vec0.get128(), vec2.get128(), _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( vec1.get128(), vec3.get128(), _VECTORMATH_SHUF_ZCWD );
+    mX = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
+    mY = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
+    mZ = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
+}
+
+inline const Vector3 Vector3::xAxis( )
+{
+    return Vector3( spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f) );
+}
+
+inline const Vector3 Vector3::yAxis( )
+{
+    return Vector3( spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f) );
+}
+
+inline const Vector3 Vector3::zAxis( )
+{
+    return Vector3( spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
+}
+
+inline const Vector3 lerp( vec_float4 t, const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector3 slerp( vec_float4 t, const Vector3 & unitVec0, const Vector3 & unitVec1 )
+{
+    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
+    vec_uint4 selectMask;
+    cosAngle = dot( unitVec0, unitVec1 );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    recipSinAngle = recipf4( sinf4( angle ) );
+    scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
+    scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
+    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
+}
+
+inline void Vector3::get4Aos( Aos::Vector3 & result0, Aos::Vector3 & result1, Aos::Vector3 & result2, Aos::Vector3 & result3 ) const
+{
+    vec_float4 tmp0, tmp1;
+    tmp0 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_ZCWD );
+    result0 = Aos::Vector3( spu_shuffle( tmp0, mY, _VECTORMATH_SHUF_XAYB ) );
+    result1 = Aos::Vector3( spu_shuffle( tmp0, mY, _VECTORMATH_SHUF_ZBW0 ) );
+    result2 = Aos::Vector3( spu_shuffle( tmp1, mY, _VECTORMATH_SHUF_XCY0 ) );
+    result3 = Aos::Vector3( spu_shuffle( tmp1, mY, _VECTORMATH_SHUF_ZDW0 ) );
+}
+
+inline void loadXYZArray( Vector3 & vec, const vec_float4 * threeQuads )
+{
+    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyxy = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_XYCD );
+    zxzx = spu_shuffle( zxyz, xyzx, _VECTORMATH_SHUF_XYCD );
+    yzyz = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_XYCD );
+    vec.setX( spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XDZB ) );
+    vec.setY( spu_shuffle( xyxy, yzyz, _VECTORMATH_SHUF_YAWC ) );
+    vec.setZ( spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_ZBXD ) );
+}
+
+inline void storeXYZArray( const Vector3 & vec, vec_float4 * threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
+    xyxy = spu_shuffle( vec.getX(), vec.getY(), _VECTORMATH_SHUF_XAZC );
+    zxzx = spu_shuffle( vec.getZ(), vec.getX(), _VECTORMATH_SHUF_ZDXB );
+    yzyz = spu_shuffle( vec.getY(), vec.getZ(), _VECTORMATH_SHUF_YBWD );
+    xyzx = spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XYCD );
+    yzxy = spu_shuffle( yzyz, xyxy, _VECTORMATH_SHUF_XYCD );
+    zxyz = spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_XYCD );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+inline void storeHalfFloats( const Vector3 & vec0, const Vector3 & vec1, vec_ushort8 * threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    storeXYZArray( vec0, xyz0 );
+    storeXYZArray( vec1, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+inline Vector3 & Vector3::operator =( const Vector3 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+    return *this;
+}
+
+inline Vector3 & Vector3::setX( vec_float4 _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline vec_float4 Vector3::getX( ) const
+{
+    return mX;
+}
+
+inline Vector3 & Vector3::setY( vec_float4 _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline vec_float4 Vector3::getY( ) const
+{
+    return mY;
+}
+
+inline Vector3 & Vector3::setZ( vec_float4 _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline vec_float4 Vector3::getZ( ) const
+{
+    return mZ;
+}
+
+inline Vector3 & Vector3::setElem( int idx, vec_float4 value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline vec_float4 Vector3::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline Vector3::vec_float4_t & Vector3::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline vec_float4 Vector3::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Vector3 Vector3::operator +( const Vector3 & vec ) const
+{
+    return Vector3(
+        spu_add( mX, vec.mX ),
+        spu_add( mY, vec.mY ),
+        spu_add( mZ, vec.mZ )
+    );
+}
+
+inline const Vector3 Vector3::operator -( const Vector3 & vec ) const
+{
+    return Vector3(
+        spu_sub( mX, vec.mX ),
+        spu_sub( mY, vec.mY ),
+        spu_sub( mZ, vec.mZ )
+    );
+}
+
+inline const Point3 Vector3::operator +( const Point3 & pnt ) const
+{
+    return Point3(
+        spu_add( mX, pnt.getX() ),
+        spu_add( mY, pnt.getY() ),
+        spu_add( mZ, pnt.getZ() )
+    );
+}
+
+inline const Vector3 Vector3::operator *( vec_float4 scalar ) const
+{
+    return Vector3(
+        spu_mul( mX, scalar ),
+        spu_mul( mY, scalar ),
+        spu_mul( mZ, scalar )
+    );
+}
+
+inline Vector3 & Vector3::operator +=( const Vector3 & vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator -=( const Vector3 & vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator *=( vec_float4 scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator /( vec_float4 scalar ) const
+{
+    return Vector3(
+        divf4( mX, scalar ),
+        divf4( mY, scalar ),
+        divf4( mZ, scalar )
+    );
+}
+
+inline Vector3 & Vector3::operator /=( vec_float4 scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator -( ) const
+{
+    return Vector3(
+        negatef4( mX ),
+        negatef4( mY ),
+        negatef4( mZ )
+    );
+}
+
+inline const Vector3 operator *( vec_float4 scalar, const Vector3 & vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        spu_mul( vec0.getX(), vec1.getX() ),
+        spu_mul( vec0.getY(), vec1.getY() ),
+        spu_mul( vec0.getZ(), vec1.getZ() )
+    );
+}
+
+inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        divf4( vec0.getX(), vec1.getX() ),
+        divf4( vec0.getY(), vec1.getY() ),
+        divf4( vec0.getZ(), vec1.getZ() )
+    );
+}
+
+inline const Vector3 recipPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        recipf4( vec.getX() ),
+        recipf4( vec.getY() ),
+        recipf4( vec.getZ() )
+    );
+}
+
+inline const Vector3 sqrtPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        sqrtf4( vec.getX() ),
+        sqrtf4( vec.getY() ),
+        sqrtf4( vec.getZ() )
+    );
+}
+
+inline const Vector3 rsqrtPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        rsqrtf4( vec.getX() ),
+        rsqrtf4( vec.getY() ),
+        rsqrtf4( vec.getZ() )
+    );
+}
+
+inline const Vector3 absPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        fabsf4( vec.getX() ),
+        fabsf4( vec.getY() ),
+        fabsf4( vec.getZ() )
+    );
+}
+
+inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        copysignf4( vec0.getX(), vec1.getX() ),
+        copysignf4( vec0.getY(), vec1.getY() ),
+        copysignf4( vec0.getZ(), vec1.getZ() )
+    );
+}
+
+inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        fmaxf4( vec0.getX(), vec1.getX() ),
+        fmaxf4( vec0.getY(), vec1.getY() ),
+        fmaxf4( vec0.getZ(), vec1.getZ() )
+    );
+}
+
+inline vec_float4 maxElem( const Vector3 & vec )
+{
+    vec_float4 result;
+    result = fmaxf4( vec.getX(), vec.getY() );
+    result = fmaxf4( vec.getZ(), result );
+    return result;
+}
+
+inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        fminf4( vec0.getX(), vec1.getX() ),
+        fminf4( vec0.getY(), vec1.getY() ),
+        fminf4( vec0.getZ(), vec1.getZ() )
+    );
+}
+
+inline vec_float4 minElem( const Vector3 & vec )
+{
+    vec_float4 result;
+    result = fminf4( vec.getX(), vec.getY() );
+    result = fminf4( vec.getZ(), result );
+    return result;
+}
+
+inline vec_float4 sum( const Vector3 & vec )
+{
+    vec_float4 result;
+    result = spu_add( vec.getX(), vec.getY() );
+    result = spu_add( result, vec.getZ() );
+    return result;
+}
+
+inline vec_float4 dot( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    vec_float4 result;
+    result = spu_mul( vec0.getX(), vec1.getX() );
+    result = spu_add( result, spu_mul( vec0.getY(), vec1.getY() ) );
+    result = spu_add( result, spu_mul( vec0.getZ(), vec1.getZ() ) );
+    return result;
+}
+
+inline vec_float4 lengthSqr( const Vector3 & vec )
+{
+    vec_float4 result;
+    result = spu_mul( vec.getX(), vec.getX() );
+    result = spu_add( result, spu_mul( vec.getY(), vec.getY() ) );
+    result = spu_add( result, spu_mul( vec.getZ(), vec.getZ() ) );
+    return result;
+}
+
+inline vec_float4 length( const Vector3 & vec )
+{
+    return sqrtf4( lengthSqr( vec ) );
+}
+
+inline const Vector3 normalize( const Vector3 & vec )
+{
+    vec_float4 lenSqr, lenInv;
+    lenSqr = lengthSqr( vec );
+    lenInv = rsqrtf4( lenSqr );
+    return Vector3(
+        spu_mul( vec.getX(), lenInv ),
+        spu_mul( vec.getY(), lenInv ),
+        spu_mul( vec.getZ(), lenInv )
+    );
+}
+
+inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        spu_sub( spu_mul( vec0.getY(), vec1.getZ() ), spu_mul( vec0.getZ(), vec1.getY() ) ),
+        spu_sub( spu_mul( vec0.getZ(), vec1.getX() ), spu_mul( vec0.getX(), vec1.getZ() ) ),
+        spu_sub( spu_mul( vec0.getX(), vec1.getY() ), spu_mul( vec0.getY(), vec1.getX() ) )
+    );
+}
+
+inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, vec_uint4 select1 )
+{
+    return Vector3(
+        spu_sel( vec0.getX(), vec1.getX(), select1 ),
+        spu_sel( vec0.getY(), vec1.getY(), select1 ),
+        spu_sel( vec0.getZ(), vec1.getZ(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Vector3 & vec )
+{
+    Aos::Vector3 vec0, vec1, vec2, vec3;
+    vec.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+inline void print( const Vector3 & vec, const char * name )
+{
+    Aos::Vector3 vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    vec.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+#endif
+
+inline Vector4::Vector4( const Vector4 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+    mW = vec.mW;
+}
+
+inline Vector4::Vector4( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+    mW = _w;
+}
+
+inline Vector4::Vector4( const Vector3 & xyz, vec_float4 _w )
+{
+    this->setXYZ( xyz );
+    this->setW( _w );
+}
+
+inline Vector4::Vector4( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    mW = spu_splats(0.0f);
+}
+
+inline Vector4::Vector4( const Point3 & pnt )
+{
+    mX = pnt.getX();
+    mY = pnt.getY();
+    mZ = pnt.getZ();
+    mW = spu_splats(1.0f);
+}
+
+inline Vector4::Vector4( const Quat & quat )
+{
+    mX = quat.getX();
+    mY = quat.getY();
+    mZ = quat.getZ();
+    mW = quat.getW();
+}
+
+inline Vector4::Vector4( vec_float4 scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+    mW = scalar;
+}
+
+inline Vector4::Vector4( Aos::Vector4 vec )
+{
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
+    vec_float4 vec128 = vec.get128();
+    mX = spu_shuffle( vec128, vec128, shuffle_xxxx );
+    mY = spu_shuffle( vec128, vec128, shuffle_yyyy );
+    mZ = spu_shuffle( vec128, vec128, shuffle_zzzz );
+    mW = spu_shuffle( vec128, vec128, shuffle_wwww );
+}
+
+inline Vector4::Vector4( Aos::Vector4 vec0, Aos::Vector4 vec1, Aos::Vector4 vec2, Aos::Vector4 vec3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = spu_shuffle( vec0.get128(), vec2.get128(), _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( vec1.get128(), vec3.get128(), _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( vec0.get128(), vec2.get128(), _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( vec1.get128(), vec3.get128(), _VECTORMATH_SHUF_ZCWD );
+    mX = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
+    mY = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
+    mZ = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
+    mW = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
+}
+
+inline const Vector4 Vector4::xAxis( )
+{
+    return Vector4( spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
+}
+
+inline const Vector4 Vector4::yAxis( )
+{
+    return Vector4( spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f) );
+}
+
+inline const Vector4 Vector4::zAxis( )
+{
+    return Vector4( spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f) );
+}
+
+inline const Vector4 Vector4::wAxis( )
+{
+    return Vector4( spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
+}
+
+inline const Vector4 lerp( vec_float4 t, const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector4 slerp( vec_float4 t, const Vector4 & unitVec0, const Vector4 & unitVec1 )
+{
+    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
+    vec_uint4 selectMask;
+    cosAngle = dot( unitVec0, unitVec1 );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    recipSinAngle = recipf4( sinf4( angle ) );
+    scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
+    scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
+    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
+}
+
+inline void Vector4::get4Aos( Aos::Vector4 & result0, Aos::Vector4 & result1, Aos::Vector4 & result2, Aos::Vector4 & result3 ) const
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( mY, mW, _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( mY, mW, _VECTORMATH_SHUF_ZCWD );
+    result0 = Aos::Vector4( spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB ) );
+    result1 = Aos::Vector4( spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD ) );
+    result2 = Aos::Vector4( spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB ) );
+    result3 = Aos::Vector4( spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD ) );
+}
+
+inline void storeHalfFloats( const Vector4 & vec, vec_ushort8 * twoQuads )
+{
+    Aos::Vector4 v0, v1, v2, v3;
+    vec.get4Aos( v0, v1, v2, v3 );
+    twoQuads[0] = _vmath2VfToHalfFloats(v0.get128(), v1.get128());
+    twoQuads[1] = _vmath2VfToHalfFloats(v2.get128(), v3.get128());
+}
+
+inline Vector4 & Vector4::operator =( const Vector4 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+    mW = vec.mW;
+    return *this;
+}
+
+inline Vector4 & Vector4::setXYZ( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    return *this;
+}
+
+inline const Vector3 Vector4::getXYZ( ) const
+{
+    return Vector3( mX, mY, mZ );
+}
+
+inline Vector4 & Vector4::setX( vec_float4 _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline vec_float4 Vector4::getX( ) const
+{
+    return mX;
+}
+
+inline Vector4 & Vector4::setY( vec_float4 _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline vec_float4 Vector4::getY( ) const
+{
+    return mY;
+}
+
+inline Vector4 & Vector4::setZ( vec_float4 _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline vec_float4 Vector4::getZ( ) const
+{
+    return mZ;
+}
+
+inline Vector4 & Vector4::setW( vec_float4 _w )
+{
+    mW = _w;
+    return *this;
+}
+
+inline vec_float4 Vector4::getW( ) const
+{
+    return mW;
+}
+
+inline Vector4 & Vector4::setElem( int idx, vec_float4 value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline vec_float4 Vector4::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline Vector4::vec_float4_t & Vector4::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline vec_float4 Vector4::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Vector4 Vector4::operator +( const Vector4 & vec ) const
+{
+    return Vector4(
+        spu_add( mX, vec.mX ),
+        spu_add( mY, vec.mY ),
+        spu_add( mZ, vec.mZ ),
+        spu_add( mW, vec.mW )
+    );
+}
+
+inline const Vector4 Vector4::operator -( const Vector4 & vec ) const
+{
+    return Vector4(
+        spu_sub( mX, vec.mX ),
+        spu_sub( mY, vec.mY ),
+        spu_sub( mZ, vec.mZ ),
+        spu_sub( mW, vec.mW )
+    );
+}
+
+inline const Vector4 Vector4::operator *( vec_float4 scalar ) const
+{
+    return Vector4(
+        spu_mul( mX, scalar ),
+        spu_mul( mY, scalar ),
+        spu_mul( mZ, scalar ),
+        spu_mul( mW, scalar )
+    );
+}
+
+inline Vector4 & Vector4::operator +=( const Vector4 & vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator -=( const Vector4 & vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator *=( vec_float4 scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator /( vec_float4 scalar ) const
+{
+    return Vector4(
+        divf4( mX, scalar ),
+        divf4( mY, scalar ),
+        divf4( mZ, scalar ),
+        divf4( mW, scalar )
+    );
+}
+
+inline Vector4 & Vector4::operator /=( vec_float4 scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator -( ) const
+{
+    return Vector4(
+        negatef4( mX ),
+        negatef4( mY ),
+        negatef4( mZ ),
+        negatef4( mW )
+    );
+}
+
+inline const Vector4 operator *( vec_float4 scalar, const Vector4 & vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        spu_mul( vec0.getX(), vec1.getX() ),
+        spu_mul( vec0.getY(), vec1.getY() ),
+        spu_mul( vec0.getZ(), vec1.getZ() ),
+        spu_mul( vec0.getW(), vec1.getW() )
+    );
+}
+
+inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        divf4( vec0.getX(), vec1.getX() ),
+        divf4( vec0.getY(), vec1.getY() ),
+        divf4( vec0.getZ(), vec1.getZ() ),
+        divf4( vec0.getW(), vec1.getW() )
+    );
+}
+
+inline const Vector4 recipPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        recipf4( vec.getX() ),
+        recipf4( vec.getY() ),
+        recipf4( vec.getZ() ),
+        recipf4( vec.getW() )
+    );
+}
+
+inline const Vector4 sqrtPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        sqrtf4( vec.getX() ),
+        sqrtf4( vec.getY() ),
+        sqrtf4( vec.getZ() ),
+        sqrtf4( vec.getW() )
+    );
+}
+
+inline const Vector4 rsqrtPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        rsqrtf4( vec.getX() ),
+        rsqrtf4( vec.getY() ),
+        rsqrtf4( vec.getZ() ),
+        rsqrtf4( vec.getW() )
+    );
+}
+
+inline const Vector4 absPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        fabsf4( vec.getX() ),
+        fabsf4( vec.getY() ),
+        fabsf4( vec.getZ() ),
+        fabsf4( vec.getW() )
+    );
+}
+
+inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        copysignf4( vec0.getX(), vec1.getX() ),
+        copysignf4( vec0.getY(), vec1.getY() ),
+        copysignf4( vec0.getZ(), vec1.getZ() ),
+        copysignf4( vec0.getW(), vec1.getW() )
+    );
+}
+
+inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        fmaxf4( vec0.getX(), vec1.getX() ),
+        fmaxf4( vec0.getY(), vec1.getY() ),
+        fmaxf4( vec0.getZ(), vec1.getZ() ),
+        fmaxf4( vec0.getW(), vec1.getW() )
+    );
+}
+
+inline vec_float4 maxElem( const Vector4 & vec )
+{
+    vec_float4 result;
+    result = fmaxf4( vec.getX(), vec.getY() );
+    result = fmaxf4( vec.getZ(), result );
+    result = fmaxf4( vec.getW(), result );
+    return result;
+}
+
+inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        fminf4( vec0.getX(), vec1.getX() ),
+        fminf4( vec0.getY(), vec1.getY() ),
+        fminf4( vec0.getZ(), vec1.getZ() ),
+        fminf4( vec0.getW(), vec1.getW() )
+    );
+}
+
+inline vec_float4 minElem( const Vector4 & vec )
+{
+    vec_float4 result;
+    result = fminf4( vec.getX(), vec.getY() );
+    result = fminf4( vec.getZ(), result );
+    result = fminf4( vec.getW(), result );
+    return result;
+}
+
+inline vec_float4 sum( const Vector4 & vec )
+{
+    vec_float4 result;
+    result = spu_add( vec.getX(), vec.getY() );
+    result = spu_add( result, vec.getZ() );
+    result = spu_add( result, vec.getW() );
+    return result;
+}
+
+inline vec_float4 dot( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    vec_float4 result;
+    result = spu_mul( vec0.getX(), vec1.getX() );
+    result = spu_add( result, spu_mul( vec0.getY(), vec1.getY() ) );
+    result = spu_add( result, spu_mul( vec0.getZ(), vec1.getZ() ) );
+    result = spu_add( result, spu_mul( vec0.getW(), vec1.getW() ) );
+    return result;
+}
+
+inline vec_float4 lengthSqr( const Vector4 & vec )
+{
+    vec_float4 result;
+    result = spu_mul( vec.getX(), vec.getX() );
+    result = spu_add( result, spu_mul( vec.getY(), vec.getY() ) );
+    result = spu_add( result, spu_mul( vec.getZ(), vec.getZ() ) );
+    result = spu_add( result, spu_mul( vec.getW(), vec.getW() ) );
+    return result;
+}
+
+inline vec_float4 length( const Vector4 & vec )
+{
+    return sqrtf4( lengthSqr( vec ) );
+}
+
+inline const Vector4 normalize( const Vector4 & vec )
+{
+    vec_float4 lenSqr, lenInv;
+    lenSqr = lengthSqr( vec );
+    lenInv = rsqrtf4( lenSqr );
+    return Vector4(
+        spu_mul( vec.getX(), lenInv ),
+        spu_mul( vec.getY(), lenInv ),
+        spu_mul( vec.getZ(), lenInv ),
+        spu_mul( vec.getW(), lenInv )
+    );
+}
+
+inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, vec_uint4 select1 )
+{
+    return Vector4(
+        spu_sel( vec0.getX(), vec1.getX(), select1 ),
+        spu_sel( vec0.getY(), vec1.getY(), select1 ),
+        spu_sel( vec0.getZ(), vec1.getZ(), select1 ),
+        spu_sel( vec0.getW(), vec1.getW(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Vector4 & vec )
+{
+    Aos::Vector4 vec0, vec1, vec2, vec3;
+    vec.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+inline void print( const Vector4 & vec, const char * name )
+{
+    Aos::Vector4 vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    vec.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+#endif
+
+inline Point3::Point3( const Point3 & pnt )
+{
+    mX = pnt.mX;
+    mY = pnt.mY;
+    mZ = pnt.mZ;
+}
+
+inline Point3::Point3( vec_float4 _x, vec_float4 _y, vec_float4 _z )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+}
+
+inline Point3::Point3( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+}
+
+inline Point3::Point3( vec_float4 scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+}
+
+inline Point3::Point3( Aos::Point3 pnt )
+{
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    vec_float4 vec128 = pnt.get128();
+    mX = spu_shuffle( vec128, vec128, shuffle_xxxx );
+    mY = spu_shuffle( vec128, vec128, shuffle_yyyy );
+    mZ = spu_shuffle( vec128, vec128, shuffle_zzzz );
+}
+
+inline Point3::Point3( Aos::Point3 pnt0, Aos::Point3 pnt1, Aos::Point3 pnt2, Aos::Point3 pnt3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = spu_shuffle( pnt0.get128(), pnt2.get128(), _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( pnt1.get128(), pnt3.get128(), _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( pnt0.get128(), pnt2.get128(), _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( pnt1.get128(), pnt3.get128(), _VECTORMATH_SHUF_ZCWD );
+    mX = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
+    mY = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
+    mZ = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
+}
+
+inline const Point3 lerp( vec_float4 t, const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
+}
+
+inline void Point3::get4Aos( Aos::Point3 & result0, Aos::Point3 & result1, Aos::Point3 & result2, Aos::Point3 & result3 ) const
+{
+    vec_float4 tmp0, tmp1;
+    tmp0 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_ZCWD );
+    result0 = Aos::Point3( spu_shuffle( tmp0, mY, _VECTORMATH_SHUF_XAYB ) );
+    result1 = Aos::Point3( spu_shuffle( tmp0, mY, _VECTORMATH_SHUF_ZBW0 ) );
+    result2 = Aos::Point3( spu_shuffle( tmp1, mY, _VECTORMATH_SHUF_XCY0 ) );
+    result3 = Aos::Point3( spu_shuffle( tmp1, mY, _VECTORMATH_SHUF_ZDW0 ) );
+}
+
+inline void loadXYZArray( Point3 & vec, const vec_float4 * threeQuads )
+{
+    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyxy = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_XYCD );
+    zxzx = spu_shuffle( zxyz, xyzx, _VECTORMATH_SHUF_XYCD );
+    yzyz = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_XYCD );
+    vec.setX( spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XDZB ) );
+    vec.setY( spu_shuffle( xyxy, yzyz, _VECTORMATH_SHUF_YAWC ) );
+    vec.setZ( spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_ZBXD ) );
+}
+
+inline void storeXYZArray( const Point3 & vec, vec_float4 * threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
+    xyxy = spu_shuffle( vec.getX(), vec.getY(), _VECTORMATH_SHUF_XAZC );
+    zxzx = spu_shuffle( vec.getZ(), vec.getX(), _VECTORMATH_SHUF_ZDXB );
+    yzyz = spu_shuffle( vec.getY(), vec.getZ(), _VECTORMATH_SHUF_YBWD );
+    xyzx = spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XYCD );
+    yzxy = spu_shuffle( yzyz, xyxy, _VECTORMATH_SHUF_XYCD );
+    zxyz = spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_XYCD );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+inline void storeHalfFloats( const Point3 & pnt0, const Point3 & pnt1, vec_ushort8 * threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    storeXYZArray( pnt0, xyz0 );
+    storeXYZArray( pnt1, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+inline Point3 & Point3::operator =( const Point3 & pnt )
+{
+    mX = pnt.mX;
+    mY = pnt.mY;
+    mZ = pnt.mZ;
+    return *this;
+}
+
+inline Point3 & Point3::setX( vec_float4 _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline vec_float4 Point3::getX( ) const
+{
+    return mX;
+}
+
+inline Point3 & Point3::setY( vec_float4 _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline vec_float4 Point3::getY( ) const
+{
+    return mY;
+}
+
+inline Point3 & Point3::setZ( vec_float4 _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline vec_float4 Point3::getZ( ) const
+{
+    return mZ;
+}
+
+inline Point3 & Point3::setElem( int idx, vec_float4 value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline vec_float4 Point3::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline Point3::vec_float4_t & Point3::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline vec_float4 Point3::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Vector3 Point3::operator -( const Point3 & pnt ) const
+{
+    return Vector3(
+        spu_sub( mX, pnt.mX ),
+        spu_sub( mY, pnt.mY ),
+        spu_sub( mZ, pnt.mZ )
+    );
+}
+
+inline const Point3 Point3::operator +( const Vector3 & vec ) const
+{
+    return Point3(
+        spu_add( mX, vec.getX() ),
+        spu_add( mY, vec.getY() ),
+        spu_add( mZ, vec.getZ() )
+    );
+}
+
+inline const Point3 Point3::operator -( const Vector3 & vec ) const
+{
+    return Point3(
+        spu_sub( mX, vec.getX() ),
+        spu_sub( mY, vec.getY() ),
+        spu_sub( mZ, vec.getZ() )
+    );
+}
+
+inline Point3 & Point3::operator +=( const Vector3 & vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Point3 & Point3::operator -=( const Vector3 & vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        spu_mul( pnt0.getX(), pnt1.getX() ),
+        spu_mul( pnt0.getY(), pnt1.getY() ),
+        spu_mul( pnt0.getZ(), pnt1.getZ() )
+    );
+}
+
+inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        divf4( pnt0.getX(), pnt1.getX() ),
+        divf4( pnt0.getY(), pnt1.getY() ),
+        divf4( pnt0.getZ(), pnt1.getZ() )
+    );
+}
+
+inline const Point3 recipPerElem( const Point3 & pnt )
+{
+    return Point3(
+        recipf4( pnt.getX() ),
+        recipf4( pnt.getY() ),
+        recipf4( pnt.getZ() )
+    );
+}
+
+inline const Point3 sqrtPerElem( const Point3 & pnt )
+{
+    return Point3(
+        sqrtf4( pnt.getX() ),
+        sqrtf4( pnt.getY() ),
+        sqrtf4( pnt.getZ() )
+    );
+}
+
+inline const Point3 rsqrtPerElem( const Point3 & pnt )
+{
+    return Point3(
+        rsqrtf4( pnt.getX() ),
+        rsqrtf4( pnt.getY() ),
+        rsqrtf4( pnt.getZ() )
+    );
+}
+
+inline const Point3 absPerElem( const Point3 & pnt )
+{
+    return Point3(
+        fabsf4( pnt.getX() ),
+        fabsf4( pnt.getY() ),
+        fabsf4( pnt.getZ() )
+    );
+}
+
+inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        copysignf4( pnt0.getX(), pnt1.getX() ),
+        copysignf4( pnt0.getY(), pnt1.getY() ),
+        copysignf4( pnt0.getZ(), pnt1.getZ() )
+    );
+}
+
+inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        fmaxf4( pnt0.getX(), pnt1.getX() ),
+        fmaxf4( pnt0.getY(), pnt1.getY() ),
+        fmaxf4( pnt0.getZ(), pnt1.getZ() )
+    );
+}
+
+inline vec_float4 maxElem( const Point3 & pnt )
+{
+    vec_float4 result;
+    result = fmaxf4( pnt.getX(), pnt.getY() );
+    result = fmaxf4( pnt.getZ(), result );
+    return result;
+}
+
+inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        fminf4( pnt0.getX(), pnt1.getX() ),
+        fminf4( pnt0.getY(), pnt1.getY() ),
+        fminf4( pnt0.getZ(), pnt1.getZ() )
+    );
+}
+
+inline vec_float4 minElem( const Point3 & pnt )
+{
+    vec_float4 result;
+    result = fminf4( pnt.getX(), pnt.getY() );
+    result = fminf4( pnt.getZ(), result );
+    return result;
+}
+
+inline vec_float4 sum( const Point3 & pnt )
+{
+    vec_float4 result;
+    result = spu_add( pnt.getX(), pnt.getY() );
+    result = spu_add( result, pnt.getZ() );
+    return result;
+}
+
+inline const Point3 scale( const Point3 & pnt, vec_float4 scaleVal )
+{
+    return mulPerElem( pnt, Point3( scaleVal ) );
+}
+
+inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec )
+{
+    return mulPerElem( pnt, Point3( scaleVec ) );
+}
+
+inline vec_float4 projection( const Point3 & pnt, const Vector3 & unitVec )
+{
+    vec_float4 result;
+    result = spu_mul( pnt.getX(), unitVec.getX() );
+    result = spu_add( result, spu_mul( pnt.getY(), unitVec.getY() ) );
+    result = spu_add( result, spu_mul( pnt.getZ(), unitVec.getZ() ) );
+    return result;
+}
+
+inline vec_float4 distSqrFromOrigin( const Point3 & pnt )
+{
+    return lengthSqr( Vector3( pnt ) );
+}
+
+inline vec_float4 distFromOrigin( const Point3 & pnt )
+{
+    return length( Vector3( pnt ) );
+}
+
+inline vec_float4 distSqr( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return lengthSqr( ( pnt1 - pnt0 ) );
+}
+
+inline vec_float4 dist( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return length( ( pnt1 - pnt0 ) );
+}
+
+inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, vec_uint4 select1 )
+{
+    return Point3(
+        spu_sel( pnt0.getX(), pnt1.getX(), select1 ),
+        spu_sel( pnt0.getY(), pnt1.getY(), select1 ),
+        spu_sel( pnt0.getZ(), pnt1.getZ(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Point3 & pnt )
+{
+    Aos::Point3 vec0, vec1, vec2, vec3;
+    pnt.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+inline void print( const Point3 & pnt, const char * name )
+{
+    Aos::Point3 vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    pnt.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+#endif
+
+} // namespace Soa
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/cpp/vecidx_aos.h b/Extras/vectormathlibrary/include/vectormath/spu/cpp/vecidx_aos.h
index e46578ad0..f5309153d 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/cpp/vecidx_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/cpp/vecidx_aos.h
@@ -1,64 +1,64 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VECIDX_AOS_H
-#define _VECTORMATH_VECIDX_AOS_H
-
-#include <spu_intrinsics.h>
-
-namespace Vectormath {
-namespace Aos {
-
-//-----------------------------------------------------------------------------
-// VecIdx 
-// Used in setting elements of Vector3, Vector4, Point3, or Quat with the 
-// subscripting operator.
-//
-
-class VecIdx
-{
-private:
-    typedef vec_float4 vec_float4_t;
-    vec_float4_t &ref __attribute__ ((aligned(16)));
-    int i __attribute__ ((aligned(16)));
-public:
-    inline VecIdx( vec_float4& vec, int idx ): ref(vec) { i = idx; }
-    inline operator float() const;
-    inline float operator =( float scalar );
-    inline float operator =( const VecIdx& scalar );
-    inline float operator *=( float scalar );
-    inline float operator /=( float scalar );
-    inline float operator +=( float scalar );
-    inline float operator -=( float scalar );
-};
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VECIDX_AOS_H
+#define _VECTORMATH_VECIDX_AOS_H
+
+#include <spu_intrinsics.h>
+
+namespace Vectormath {
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// VecIdx 
+// Used in setting elements of Vector3, Vector4, Point3, or Quat with the 
+// subscripting operator.
+//
+
+class VecIdx
+{
+private:
+    typedef vec_float4 vec_float4_t;
+    vec_float4_t &ref __attribute__ ((aligned(16)));
+    int i __attribute__ ((aligned(16)));
+public:
+    inline VecIdx( vec_float4& vec, int idx ): ref(vec) { i = idx; }
+    inline operator float() const;
+    inline float operator =( float scalar );
+    inline float operator =( const VecIdx& scalar );
+    inline float operator *=( float scalar );
+    inline float operator /=( float scalar );
+    inline float operator +=( float scalar );
+    inline float operator -=( float scalar );
+};
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/cpp/vectormath_aos.h b/Extras/vectormathlibrary/include/vectormath/spu/cpp/vectormath_aos.h
index 5fcd55c2e..f876c5382 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/cpp/vectormath_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/cpp/vectormath_aos.h
@@ -1,1851 +1,1851 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_AOS_CPP_SPU_H
-#define _VECTORMATH_AOS_CPP_SPU_H
-
-#include <math.h>
-#include <spu_intrinsics.h>
-#include "floatInVec.h"
-#include "boolInVec.h"
-#include "vecidx_aos.h"
-#include <stdio.h>
-
-#ifdef _VECTORMATH_DEBUG
-#endif
-
-namespace Vectormath {
-
-namespace Aos {
-
-//-----------------------------------------------------------------------------
-// Forward Declarations
-//
-
-class Vector3;
-class Vector4;
-class Point3;
-class Quat;
-class Matrix3;
-class Matrix4;
-class Transform3;
-
-// A 3-D vector in array-of-structures format
-//
-class Vector3
-{
-    vec_float4 mVec128;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Vector3( ) { };
-
-    // Construct a 3-D vector from x, y, and z elements
-    // 
-    inline Vector3( float x, float y, float z );
-
-    // Copy elements from a 3-D point into a 3-D vector
-    // 
-    explicit inline Vector3( Point3 pnt );
-
-    // Set all elements of a 3-D vector to the same scalar value
-    // 
-    explicit inline Vector3( float scalar );
-
-    // Set vector float data in a 3-D vector
-    // 
-    explicit inline Vector3( vec_float4 vf4 );
-
-    // Get vector float data from a 3-D vector
-    // 
-    inline vec_float4 get128( ) const;
-
-    // Assign one 3-D vector to another
-    // 
-    inline Vector3 & operator =( Vector3 vec );
-
-    // Set the x element of a 3-D vector
-    // 
-    inline Vector3 & setX( float x );
-
-    // Set the y element of a 3-D vector
-    // 
-    inline Vector3 & setY( float y );
-
-    // Set the z element of a 3-D vector
-    // 
-    inline Vector3 & setZ( float z );
-
-    // Get the x element of a 3-D vector
-    // 
-    inline float getX( ) const;
-
-    // Get the y element of a 3-D vector
-    // 
-    inline float getY( ) const;
-
-    // Get the z element of a 3-D vector
-    // 
-    inline float getZ( ) const;
-
-    // Set an x, y, or z element of a 3-D vector by index
-    // 
-    inline Vector3 & setElem( int idx, float value );
-
-    // Get an x, y, or z element of a 3-D vector by index
-    // 
-    inline float getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline VecIdx operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline float operator []( int idx ) const;
-
-    // Add two 3-D vectors
-    // 
-    inline const Vector3 operator +( Vector3 vec ) const;
-
-    // Subtract a 3-D vector from another 3-D vector
-    // 
-    inline const Vector3 operator -( Vector3 vec ) const;
-
-    // Add a 3-D vector to a 3-D point
-    // 
-    inline const Point3 operator +( Point3 pnt ) const;
-
-    // Multiply a 3-D vector by a scalar
-    // 
-    inline const Vector3 operator *( float scalar ) const;
-
-    // Divide a 3-D vector by a scalar
-    // 
-    inline const Vector3 operator /( float scalar ) const;
-
-    // Perform compound assignment and addition with a 3-D vector
-    // 
-    inline Vector3 & operator +=( Vector3 vec );
-
-    // Perform compound assignment and subtraction by a 3-D vector
-    // 
-    inline Vector3 & operator -=( Vector3 vec );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Vector3 & operator *=( float scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Vector3 & operator /=( float scalar );
-
-    // Negate all elements of a 3-D vector
-    // 
-    inline const Vector3 operator -( ) const;
-
-    // Construct x axis
-    // 
-    static inline const Vector3 xAxis( );
-
-    // Construct y axis
-    // 
-    static inline const Vector3 yAxis( );
-
-    // Construct z axis
-    // 
-    static inline const Vector3 zAxis( );
-
-};
-
-// Multiply a 3-D vector by a scalar
-// 
-inline const Vector3 operator *( float scalar, Vector3 vec );
-
-// Multiply two 3-D vectors per element
-// 
-inline const Vector3 mulPerElem( Vector3 vec0, Vector3 vec1 );
-
-// Divide two 3-D vectors per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Vector3 divPerElem( Vector3 vec0, Vector3 vec1 );
-
-// Compute the reciprocal of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Vector3 recipPerElem( Vector3 vec );
-
-// Compute the square root of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Vector3 sqrtPerElem( Vector3 vec );
-
-// Compute the reciprocal square root of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Vector3 rsqrtPerElem( Vector3 vec );
-
-// Compute the absolute value of a 3-D vector per element
-// 
-inline const Vector3 absPerElem( Vector3 vec );
-
-// Copy sign from one 3-D vector to another, per element
-// 
-inline const Vector3 copySignPerElem( Vector3 vec0, Vector3 vec1 );
-
-// Maximum of two 3-D vectors per element
-// 
-inline const Vector3 maxPerElem( Vector3 vec0, Vector3 vec1 );
-
-// Minimum of two 3-D vectors per element
-// 
-inline const Vector3 minPerElem( Vector3 vec0, Vector3 vec1 );
-
-// Maximum element of a 3-D vector
-// 
-inline float maxElem( Vector3 vec );
-
-// Minimum element of a 3-D vector
-// 
-inline float minElem( Vector3 vec );
-
-// Compute the sum of all elements of a 3-D vector
-// 
-inline float sum( Vector3 vec );
-
-// Compute the dot product of two 3-D vectors
-// 
-inline float dot( Vector3 vec0, Vector3 vec1 );
-
-// Compute the square of the length of a 3-D vector
-// 
-inline float lengthSqr( Vector3 vec );
-
-// Compute the length of a 3-D vector
-// 
-inline float length( Vector3 vec );
-
-// Normalize a 3-D vector
-// NOTE: 
-// The result is unpredictable when all elements of vec are at or near zero.
-// 
-inline const Vector3 normalize( Vector3 vec );
-
-// Compute cross product of two 3-D vectors
-// 
-inline const Vector3 cross( Vector3 vec0, Vector3 vec1 );
-
-// Outer product of two 3-D vectors
-// 
-inline const Matrix3 outer( Vector3 vec0, Vector3 vec1 );
-
-// Pre-multiply a row vector by a 3x3 matrix
-// NOTE: 
-// Slower than column post-multiply.
-// 
-inline const Vector3 rowMul( Vector3 vec, const Matrix3 & mat );
-
-// Cross-product matrix of a 3-D vector
-// 
-inline const Matrix3 crossMatrix( Vector3 vec );
-
-// Create cross-product matrix and multiply
-// NOTE: 
-// Faster than separately creating a cross-product matrix and multiplying.
-// 
-inline const Matrix3 crossMatrixMul( Vector3 vec, const Matrix3 & mat );
-
-// Linear interpolation between two 3-D vectors
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 lerp( float t, Vector3 vec0, Vector3 vec1 );
-
-// Spherical linear interpolation between two 3-D vectors
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 slerp( float t, Vector3 unitVec0, Vector3 unitVec1 );
-
-// Conditionally select between two 3-D vectors
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Vector3 select( Vector3 vec0, Vector3 vec1, bool select1 );
-
-// Store x, y, and z elements of a 3-D vector in the first three words of a quadword.
-// The value of the fourth word (the word with the highest address) remains unchanged
-// 
-inline void storeXYZ( Vector3 vec, vec_float4 * quad );
-
-// Load four three-float 3-D vectors, stored in three quadwords
-// 
-inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const vec_float4 * threeQuads );
-
-// Store four 3-D vectors in three quadwords
-// 
-inline void storeXYZArray( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, vec_float4 * threeQuads );
-
-// Store eight 3-D vectors as half-floats
-// 
-inline void storeHalfFloats( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, Vector3 vec4, Vector3 vec5, Vector3 vec6, Vector3 vec7, vec_ushort8 * threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3-D vector
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Vector3 vec );
-
-// Print a 3-D vector and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Vector3 vec, const char * name );
-
-#endif
-
-// A 4-D vector in array-of-structures format
-//
-class Vector4
-{
-    vec_float4 mVec128;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Vector4( ) { };
-
-    // Construct a 4-D vector from x, y, z, and w elements
-    // 
-    inline Vector4( float x, float y, float z, float w );
-
-    // Construct a 4-D vector from a 3-D vector and a scalar
-    // 
-    inline Vector4( Vector3 xyz, float w );
-
-    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
-    // 
-    explicit inline Vector4( Vector3 vec );
-
-    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
-    // 
-    explicit inline Vector4( Point3 pnt );
-
-    // Copy elements from a quaternion into a 4-D vector
-    // 
-    explicit inline Vector4( Quat quat );
-
-    // Set all elements of a 4-D vector to the same scalar value
-    // 
-    explicit inline Vector4( float scalar );
-
-    // Set vector float data in a 4-D vector
-    // 
-    explicit inline Vector4( vec_float4 vf4 );
-
-    // Get vector float data from a 4-D vector
-    // 
-    inline vec_float4 get128( ) const;
-
-    // Assign one 4-D vector to another
-    // 
-    inline Vector4 & operator =( Vector4 vec );
-
-    // Set the x, y, and z elements of a 4-D vector
-    // NOTE: 
-    // This function does not change the w element.
-    // 
-    inline Vector4 & setXYZ( Vector3 vec );
-
-    // Get the x, y, and z elements of a 4-D vector
-    // 
-    inline const Vector3 getXYZ( ) const;
-
-    // Set the x element of a 4-D vector
-    // 
-    inline Vector4 & setX( float x );
-
-    // Set the y element of a 4-D vector
-    // 
-    inline Vector4 & setY( float y );
-
-    // Set the z element of a 4-D vector
-    // 
-    inline Vector4 & setZ( float z );
-
-    // Set the w element of a 4-D vector
-    // 
-    inline Vector4 & setW( float w );
-
-    // Get the x element of a 4-D vector
-    // 
-    inline float getX( ) const;
-
-    // Get the y element of a 4-D vector
-    // 
-    inline float getY( ) const;
-
-    // Get the z element of a 4-D vector
-    // 
-    inline float getZ( ) const;
-
-    // Get the w element of a 4-D vector
-    // 
-    inline float getW( ) const;
-
-    // Set an x, y, z, or w element of a 4-D vector by index
-    // 
-    inline Vector4 & setElem( int idx, float value );
-
-    // Get an x, y, z, or w element of a 4-D vector by index
-    // 
-    inline float getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline VecIdx operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline float operator []( int idx ) const;
-
-    // Add two 4-D vectors
-    // 
-    inline const Vector4 operator +( Vector4 vec ) const;
-
-    // Subtract a 4-D vector from another 4-D vector
-    // 
-    inline const Vector4 operator -( Vector4 vec ) const;
-
-    // Multiply a 4-D vector by a scalar
-    // 
-    inline const Vector4 operator *( float scalar ) const;
-
-    // Divide a 4-D vector by a scalar
-    // 
-    inline const Vector4 operator /( float scalar ) const;
-
-    // Perform compound assignment and addition with a 4-D vector
-    // 
-    inline Vector4 & operator +=( Vector4 vec );
-
-    // Perform compound assignment and subtraction by a 4-D vector
-    // 
-    inline Vector4 & operator -=( Vector4 vec );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Vector4 & operator *=( float scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Vector4 & operator /=( float scalar );
-
-    // Negate all elements of a 4-D vector
-    // 
-    inline const Vector4 operator -( ) const;
-
-    // Construct x axis
-    // 
-    static inline const Vector4 xAxis( );
-
-    // Construct y axis
-    // 
-    static inline const Vector4 yAxis( );
-
-    // Construct z axis
-    // 
-    static inline const Vector4 zAxis( );
-
-    // Construct w axis
-    // 
-    static inline const Vector4 wAxis( );
-
-};
-
-// Multiply a 4-D vector by a scalar
-// 
-inline const Vector4 operator *( float scalar, Vector4 vec );
-
-// Multiply two 4-D vectors per element
-// 
-inline const Vector4 mulPerElem( Vector4 vec0, Vector4 vec1 );
-
-// Divide two 4-D vectors per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Vector4 divPerElem( Vector4 vec0, Vector4 vec1 );
-
-// Compute the reciprocal of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Vector4 recipPerElem( Vector4 vec );
-
-// Compute the square root of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Vector4 sqrtPerElem( Vector4 vec );
-
-// Compute the reciprocal square root of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Vector4 rsqrtPerElem( Vector4 vec );
-
-// Compute the absolute value of a 4-D vector per element
-// 
-inline const Vector4 absPerElem( Vector4 vec );
-
-// Copy sign from one 4-D vector to another, per element
-// 
-inline const Vector4 copySignPerElem( Vector4 vec0, Vector4 vec1 );
-
-// Maximum of two 4-D vectors per element
-// 
-inline const Vector4 maxPerElem( Vector4 vec0, Vector4 vec1 );
-
-// Minimum of two 4-D vectors per element
-// 
-inline const Vector4 minPerElem( Vector4 vec0, Vector4 vec1 );
-
-// Maximum element of a 4-D vector
-// 
-inline float maxElem( Vector4 vec );
-
-// Minimum element of a 4-D vector
-// 
-inline float minElem( Vector4 vec );
-
-// Compute the sum of all elements of a 4-D vector
-// 
-inline float sum( Vector4 vec );
-
-// Compute the dot product of two 4-D vectors
-// 
-inline float dot( Vector4 vec0, Vector4 vec1 );
-
-// Compute the square of the length of a 4-D vector
-// 
-inline float lengthSqr( Vector4 vec );
-
-// Compute the length of a 4-D vector
-// 
-inline float length( Vector4 vec );
-
-// Normalize a 4-D vector
-// NOTE: 
-// The result is unpredictable when all elements of vec are at or near zero.
-// 
-inline const Vector4 normalize( Vector4 vec );
-
-// Outer product of two 4-D vectors
-// 
-inline const Matrix4 outer( Vector4 vec0, Vector4 vec1 );
-
-// Linear interpolation between two 4-D vectors
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 lerp( float t, Vector4 vec0, Vector4 vec1 );
-
-// Spherical linear interpolation between two 4-D vectors
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 slerp( float t, Vector4 unitVec0, Vector4 unitVec1 );
-
-// Conditionally select between two 4-D vectors
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Vector4 select( Vector4 vec0, Vector4 vec1, bool select1 );
-
-// Store four 4-D vectors as half-floats
-// 
-inline void storeHalfFloats( Vector4 vec0, Vector4 vec1, Vector4 vec2, Vector4 vec3, vec_ushort8 * twoQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 4-D vector
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Vector4 vec );
-
-// Print a 4-D vector and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Vector4 vec, const char * name );
-
-#endif
-
-// A 3-D point in array-of-structures format
-//
-class Point3
-{
-    vec_float4 mVec128;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Point3( ) { };
-
-    // Construct a 3-D point from x, y, and z elements
-    // 
-    inline Point3( float x, float y, float z );
-
-    // Copy elements from a 3-D vector into a 3-D point
-    // 
-    explicit inline Point3( Vector3 vec );
-
-    // Set all elements of a 3-D point to the same scalar value
-    // 
-    explicit inline Point3( float scalar );
-
-    // Set vector float data in a 3-D point
-    // 
-    explicit inline Point3( vec_float4 vf4 );
-
-    // Get vector float data from a 3-D point
-    // 
-    inline vec_float4 get128( ) const;
-
-    // Assign one 3-D point to another
-    // 
-    inline Point3 & operator =( Point3 pnt );
-
-    // Set the x element of a 3-D point
-    // 
-    inline Point3 & setX( float x );
-
-    // Set the y element of a 3-D point
-    // 
-    inline Point3 & setY( float y );
-
-    // Set the z element of a 3-D point
-    // 
-    inline Point3 & setZ( float z );
-
-    // Get the x element of a 3-D point
-    // 
-    inline float getX( ) const;
-
-    // Get the y element of a 3-D point
-    // 
-    inline float getY( ) const;
-
-    // Get the z element of a 3-D point
-    // 
-    inline float getZ( ) const;
-
-    // Set an x, y, or z element of a 3-D point by index
-    // 
-    inline Point3 & setElem( int idx, float value );
-
-    // Get an x, y, or z element of a 3-D point by index
-    // 
-    inline float getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline VecIdx operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline float operator []( int idx ) const;
-
-    // Subtract a 3-D point from another 3-D point
-    // 
-    inline const Vector3 operator -( Point3 pnt ) const;
-
-    // Add a 3-D point to a 3-D vector
-    // 
-    inline const Point3 operator +( Vector3 vec ) const;
-
-    // Subtract a 3-D vector from a 3-D point
-    // 
-    inline const Point3 operator -( Vector3 vec ) const;
-
-    // Perform compound assignment and addition with a 3-D vector
-    // 
-    inline Point3 & operator +=( Vector3 vec );
-
-    // Perform compound assignment and subtraction by a 3-D vector
-    // 
-    inline Point3 & operator -=( Vector3 vec );
-
-};
-
-// Multiply two 3-D points per element
-// 
-inline const Point3 mulPerElem( Point3 pnt0, Point3 pnt1 );
-
-// Divide two 3-D points per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Point3 divPerElem( Point3 pnt0, Point3 pnt1 );
-
-// Compute the reciprocal of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Point3 recipPerElem( Point3 pnt );
-
-// Compute the square root of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Point3 sqrtPerElem( Point3 pnt );
-
-// Compute the reciprocal square root of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Point3 rsqrtPerElem( Point3 pnt );
-
-// Compute the absolute value of a 3-D point per element
-// 
-inline const Point3 absPerElem( Point3 pnt );
-
-// Copy sign from one 3-D point to another, per element
-// 
-inline const Point3 copySignPerElem( Point3 pnt0, Point3 pnt1 );
-
-// Maximum of two 3-D points per element
-// 
-inline const Point3 maxPerElem( Point3 pnt0, Point3 pnt1 );
-
-// Minimum of two 3-D points per element
-// 
-inline const Point3 minPerElem( Point3 pnt0, Point3 pnt1 );
-
-// Maximum element of a 3-D point
-// 
-inline float maxElem( Point3 pnt );
-
-// Minimum element of a 3-D point
-// 
-inline float minElem( Point3 pnt );
-
-// Compute the sum of all elements of a 3-D point
-// 
-inline float sum( Point3 pnt );
-
-// Apply uniform scale to a 3-D point
-// 
-inline const Point3 scale( Point3 pnt, float scaleVal );
-
-// Apply non-uniform scale to a 3-D point
-// 
-inline const Point3 scale( Point3 pnt, Vector3 scaleVec );
-
-// Scalar projection of a 3-D point on a unit-length 3-D vector
-// 
-inline float projection( Point3 pnt, Vector3 unitVec );
-
-// Compute the square of the distance of a 3-D point from the coordinate-system origin
-// 
-inline float distSqrFromOrigin( Point3 pnt );
-
-// Compute the distance of a 3-D point from the coordinate-system origin
-// 
-inline float distFromOrigin( Point3 pnt );
-
-// Compute the square of the distance between two 3-D points
-// 
-inline float distSqr( Point3 pnt0, Point3 pnt1 );
-
-// Compute the distance between two 3-D points
-// 
-inline float dist( Point3 pnt0, Point3 pnt1 );
-
-// Linear interpolation between two 3-D points
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Point3 lerp( float t, Point3 pnt0, Point3 pnt1 );
-
-// Conditionally select between two 3-D points
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Point3 select( Point3 pnt0, Point3 pnt1, bool select1 );
-
-// Store x, y, and z elements of a 3-D point in the first three words of a quadword.
-// The value of the fourth word (the word with the highest address) remains unchanged
-// 
-inline void storeXYZ( Point3 pnt, vec_float4 * quad );
-
-// Load four three-float 3-D points, stored in three quadwords
-// 
-inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const vec_float4 * threeQuads );
-
-// Store four 3-D points in three quadwords
-// 
-inline void storeXYZArray( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, vec_float4 * threeQuads );
-
-// Store eight 3-D points as half-floats
-// 
-inline void storeHalfFloats( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, Point3 pnt4, Point3 pnt5, Point3 pnt6, Point3 pnt7, vec_ushort8 * threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3-D point
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Point3 pnt );
-
-// Print a 3-D point and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Point3 pnt, const char * name );
-
-#endif
-
-// A quaternion in array-of-structures format
-//
-class Quat
-{
-    vec_float4 mVec128;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Quat( ) { };
-
-    // Construct a quaternion from x, y, z, and w elements
-    // 
-    inline Quat( float x, float y, float z, float w );
-
-    // Construct a quaternion from a 3-D vector and a scalar
-    // 
-    inline Quat( Vector3 xyz, float w );
-
-    // Copy elements from a 4-D vector into a quaternion
-    // 
-    explicit inline Quat( Vector4 vec );
-
-    // Convert a rotation matrix to a unit-length quaternion
-    // 
-    explicit inline Quat( const Matrix3 & rotMat );
-
-    // Set all elements of a quaternion to the same scalar value
-    // 
-    explicit inline Quat( float scalar );
-
-    // Set vector float data in a quaternion
-    // 
-    explicit inline Quat( vec_float4 vf4 );
-
-    // Get vector float data from a quaternion
-    // 
-    inline vec_float4 get128( ) const;
-
-    // Assign one quaternion to another
-    // 
-    inline Quat & operator =( Quat quat );
-
-    // Set the x, y, and z elements of a quaternion
-    // NOTE: 
-    // This function does not change the w element.
-    // 
-    inline Quat & setXYZ( Vector3 vec );
-
-    // Get the x, y, and z elements of a quaternion
-    // 
-    inline const Vector3 getXYZ( ) const;
-
-    // Set the x element of a quaternion
-    // 
-    inline Quat & setX( float x );
-
-    // Set the y element of a quaternion
-    // 
-    inline Quat & setY( float y );
-
-    // Set the z element of a quaternion
-    // 
-    inline Quat & setZ( float z );
-
-    // Set the w element of a quaternion
-    // 
-    inline Quat & setW( float w );
-
-    // Get the x element of a quaternion
-    // 
-    inline float getX( ) const;
-
-    // Get the y element of a quaternion
-    // 
-    inline float getY( ) const;
-
-    // Get the z element of a quaternion
-    // 
-    inline float getZ( ) const;
-
-    // Get the w element of a quaternion
-    // 
-    inline float getW( ) const;
-
-    // Set an x, y, z, or w element of a quaternion by index
-    // 
-    inline Quat & setElem( int idx, float value );
-
-    // Get an x, y, z, or w element of a quaternion by index
-    // 
-    inline float getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline VecIdx operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline float operator []( int idx ) const;
-
-    // Add two quaternions
-    // 
-    inline const Quat operator +( Quat quat ) const;
-
-    // Subtract a quaternion from another quaternion
-    // 
-    inline const Quat operator -( Quat quat ) const;
-
-    // Multiply two quaternions
-    // 
-    inline const Quat operator *( Quat quat ) const;
-
-    // Multiply a quaternion by a scalar
-    // 
-    inline const Quat operator *( float scalar ) const;
-
-    // Divide a quaternion by a scalar
-    // 
-    inline const Quat operator /( float scalar ) const;
-
-    // Perform compound assignment and addition with a quaternion
-    // 
-    inline Quat & operator +=( Quat quat );
-
-    // Perform compound assignment and subtraction by a quaternion
-    // 
-    inline Quat & operator -=( Quat quat );
-
-    // Perform compound assignment and multiplication by a quaternion
-    // 
-    inline Quat & operator *=( Quat quat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Quat & operator *=( float scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Quat & operator /=( float scalar );
-
-    // Negate all elements of a quaternion
-    // 
-    inline const Quat operator -( ) const;
-
-    // Construct an identity quaternion
-    // 
-    static inline const Quat identity( );
-
-    // Construct a quaternion to rotate between two unit-length 3-D vectors
-    // NOTE: 
-    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
-    // 
-    static inline const Quat rotation( Vector3 unitVec0, Vector3 unitVec1 );
-
-    // Construct a quaternion to rotate around a unit-length 3-D vector
-    // 
-    static inline const Quat rotation( float radians, Vector3 unitVec );
-
-    // Construct a quaternion to rotate around the x axis
-    // 
-    static inline const Quat rotationX( float radians );
-
-    // Construct a quaternion to rotate around the y axis
-    // 
-    static inline const Quat rotationY( float radians );
-
-    // Construct a quaternion to rotate around the z axis
-    // 
-    static inline const Quat rotationZ( float radians );
-
-};
-
-// Multiply a quaternion by a scalar
-// 
-inline const Quat operator *( float scalar, Quat quat );
-
-// Compute the conjugate of a quaternion
-// 
-inline const Quat conj( Quat quat );
-
-// Use a unit-length quaternion to rotate a 3-D vector
-// 
-inline const Vector3 rotate( Quat unitQuat, Vector3 vec );
-
-// Compute the dot product of two quaternions
-// 
-inline float dot( Quat quat0, Quat quat1 );
-
-// Compute the norm of a quaternion
-// 
-inline float norm( Quat quat );
-
-// Compute the length of a quaternion
-// 
-inline float length( Quat quat );
-
-// Normalize a quaternion
-// NOTE: 
-// The result is unpredictable when all elements of quat are at or near zero.
-// 
-inline const Quat normalize( Quat quat );
-
-// Linear interpolation between two quaternions
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat lerp( float t, Quat quat0, Quat quat1 );
-
-// Spherical linear interpolation between two quaternions
-// NOTE: 
-// Interpolates along the shortest path between orientations.
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat slerp( float t, Quat unitQuat0, Quat unitQuat1 );
-
-// Spherical quadrangle interpolation
-// 
-inline const Quat squad( float t, Quat unitQuat0, Quat unitQuat1, Quat unitQuat2, Quat unitQuat3 );
-
-// Conditionally select between two quaternions
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Quat select( Quat quat0, Quat quat1, bool select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a quaternion
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Quat quat );
-
-// Print a quaternion and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Quat quat, const char * name );
-
-#endif
-
-// A 3x3 matrix in array-of-structures format
-//
-class Matrix3
-{
-    Vector3 mCol0;
-    Vector3 mCol1;
-    Vector3 mCol2;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Matrix3( ) { };
-
-    // Copy a 3x3 matrix
-    // 
-    inline Matrix3( const Matrix3 & mat );
-
-    // Construct a 3x3 matrix containing the specified columns
-    // 
-    inline Matrix3( Vector3 col0, Vector3 col1, Vector3 col2 );
-
-    // Construct a 3x3 rotation matrix from a unit-length quaternion
-    // 
-    explicit inline Matrix3( Quat unitQuat );
-
-    // Set all elements of a 3x3 matrix to the same scalar value
-    // 
-    explicit inline Matrix3( float scalar );
-
-    // Assign one 3x3 matrix to another
-    // 
-    inline Matrix3 & operator =( const Matrix3 & mat );
-
-    // Set column 0 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol0( Vector3 col0 );
-
-    // Set column 1 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol1( Vector3 col1 );
-
-    // Set column 2 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol2( Vector3 col2 );
-
-    // Get column 0 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol0( ) const;
-
-    // Get column 1 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol1( ) const;
-
-    // Get column 2 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol2( ) const;
-
-    // Set the column of a 3x3 matrix referred to by the specified index
-    // 
-    inline Matrix3 & setCol( int col, Vector3 vec );
-
-    // Set the row of a 3x3 matrix referred to by the specified index
-    // 
-    inline Matrix3 & setRow( int row, Vector3 vec );
-
-    // Get the column of a 3x3 matrix referred to by the specified index
-    // 
-    inline const Vector3 getCol( int col ) const;
-
-    // Get the row of a 3x3 matrix referred to by the specified index
-    // 
-    inline const Vector3 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector3 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector3 operator []( int col ) const;
-
-    // Set the element of a 3x3 matrix referred to by column and row indices
-    // 
-    inline Matrix3 & setElem( int col, int row, float val );
-
-    // Get the element of a 3x3 matrix referred to by column and row indices
-    // 
-    inline float getElem( int col, int row ) const;
-
-    // Add two 3x3 matrices
-    // 
-    inline const Matrix3 operator +( const Matrix3 & mat ) const;
-
-    // Subtract a 3x3 matrix from another 3x3 matrix
-    // 
-    inline const Matrix3 operator -( const Matrix3 & mat ) const;
-
-    // Negate all elements of a 3x3 matrix
-    // 
-    inline const Matrix3 operator -( ) const;
-
-    // Multiply a 3x3 matrix by a scalar
-    // 
-    inline const Matrix3 operator *( float scalar ) const;
-
-    // Multiply a 3x3 matrix by a 3-D vector
-    // 
-    inline const Vector3 operator *( Vector3 vec ) const;
-
-    // Multiply two 3x3 matrices
-    // 
-    inline const Matrix3 operator *( const Matrix3 & mat ) const;
-
-    // Perform compound assignment and addition with a 3x3 matrix
-    // 
-    inline Matrix3 & operator +=( const Matrix3 & mat );
-
-    // Perform compound assignment and subtraction by a 3x3 matrix
-    // 
-    inline Matrix3 & operator -=( const Matrix3 & mat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Matrix3 & operator *=( float scalar );
-
-    // Perform compound assignment and multiplication by a 3x3 matrix
-    // 
-    inline Matrix3 & operator *=( const Matrix3 & mat );
-
-    // Construct an identity 3x3 matrix
-    // 
-    static inline const Matrix3 identity( );
-
-    // Construct a 3x3 matrix to rotate around the x axis
-    // 
-    static inline const Matrix3 rotationX( float radians );
-
-    // Construct a 3x3 matrix to rotate around the y axis
-    // 
-    static inline const Matrix3 rotationY( float radians );
-
-    // Construct a 3x3 matrix to rotate around the z axis
-    // 
-    static inline const Matrix3 rotationZ( float radians );
-
-    // Construct a 3x3 matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Matrix3 rotationZYX( Vector3 radiansXYZ );
-
-    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Matrix3 rotation( float radians, Vector3 unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Matrix3 rotation( Quat unitQuat );
-
-    // Construct a 3x3 matrix to perform scaling
-    // 
-    static inline const Matrix3 scale( Vector3 scaleVec );
-
-};
-// Multiply a 3x3 matrix by a scalar
-// 
-inline const Matrix3 operator *( float scalar, const Matrix3 & mat );
-
-// Append (post-multiply) a scale transformation to a 3x3 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix3 appendScale( const Matrix3 & mat, Vector3 scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix3 prependScale( Vector3 scaleVec, const Matrix3 & mat );
-
-// Multiply two 3x3 matrices per element
-// 
-inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
-
-// Compute the absolute value of a 3x3 matrix per element
-// 
-inline const Matrix3 absPerElem( const Matrix3 & mat );
-
-// Transpose of a 3x3 matrix
-// 
-inline const Matrix3 transpose( const Matrix3 & mat );
-
-// Compute the inverse of a 3x3 matrix
-// NOTE: 
-// Result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix3 inverse( const Matrix3 & mat );
-
-// Determinant of a 3x3 matrix
-// 
-inline float determinant( const Matrix3 & mat );
-
-// Conditionally select between two 3x3 matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3x3 matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix3 & mat );
-
-// Print a 3x3 matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix3 & mat, const char * name );
-
-#endif
-
-// A 4x4 matrix in array-of-structures format
-//
-class Matrix4
-{
-    Vector4 mCol0;
-    Vector4 mCol1;
-    Vector4 mCol2;
-    Vector4 mCol3;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Matrix4( ) { };
-
-    // Copy a 4x4 matrix
-    // 
-    inline Matrix4( const Matrix4 & mat );
-
-    // Construct a 4x4 matrix containing the specified columns
-    // 
-    inline Matrix4( Vector4 col0, Vector4 col1, Vector4 col2, Vector4 col3 );
-
-    // Construct a 4x4 matrix from a 3x4 transformation matrix
-    // 
-    explicit inline Matrix4( const Transform3 & mat );
-
-    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
-    // 
-    inline Matrix4( const Matrix3 & mat, Vector3 translateVec );
-
-    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
-    // 
-    inline Matrix4( Quat unitQuat, Vector3 translateVec );
-
-    // Set all elements of a 4x4 matrix to the same scalar value
-    // 
-    explicit inline Matrix4( float scalar );
-
-    // Assign one 4x4 matrix to another
-    // 
-    inline Matrix4 & operator =( const Matrix4 & mat );
-
-    // Set the upper-left 3x3 submatrix
-    // NOTE: 
-    // This function does not change the bottom row elements.
-    // 
-    inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
-
-    // Get the upper-left 3x3 submatrix of a 4x4 matrix
-    // 
-    inline const Matrix3 getUpper3x3( ) const;
-
-    // Set translation component
-    // NOTE: 
-    // This function does not change the bottom row elements.
-    // 
-    inline Matrix4 & setTranslation( Vector3 translateVec );
-
-    // Get the translation component of a 4x4 matrix
-    // 
-    inline const Vector3 getTranslation( ) const;
-
-    // Set column 0 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol0( Vector4 col0 );
-
-    // Set column 1 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol1( Vector4 col1 );
-
-    // Set column 2 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol2( Vector4 col2 );
-
-    // Set column 3 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol3( Vector4 col3 );
-
-    // Get column 0 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol0( ) const;
-
-    // Get column 1 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol1( ) const;
-
-    // Get column 2 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol2( ) const;
-
-    // Get column 3 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol3( ) const;
-
-    // Set the column of a 4x4 matrix referred to by the specified index
-    // 
-    inline Matrix4 & setCol( int col, Vector4 vec );
-
-    // Set the row of a 4x4 matrix referred to by the specified index
-    // 
-    inline Matrix4 & setRow( int row, Vector4 vec );
-
-    // Get the column of a 4x4 matrix referred to by the specified index
-    // 
-    inline const Vector4 getCol( int col ) const;
-
-    // Get the row of a 4x4 matrix referred to by the specified index
-    // 
-    inline const Vector4 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector4 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector4 operator []( int col ) const;
-
-    // Set the element of a 4x4 matrix referred to by column and row indices
-    // 
-    inline Matrix4 & setElem( int col, int row, float val );
-
-    // Get the element of a 4x4 matrix referred to by column and row indices
-    // 
-    inline float getElem( int col, int row ) const;
-
-    // Add two 4x4 matrices
-    // 
-    inline const Matrix4 operator +( const Matrix4 & mat ) const;
-
-    // Subtract a 4x4 matrix from another 4x4 matrix
-    // 
-    inline const Matrix4 operator -( const Matrix4 & mat ) const;
-
-    // Negate all elements of a 4x4 matrix
-    // 
-    inline const Matrix4 operator -( ) const;
-
-    // Multiply a 4x4 matrix by a scalar
-    // 
-    inline const Matrix4 operator *( float scalar ) const;
-
-    // Multiply a 4x4 matrix by a 4-D vector
-    // 
-    inline const Vector4 operator *( Vector4 vec ) const;
-
-    // Multiply a 4x4 matrix by a 3-D vector
-    // 
-    inline const Vector4 operator *( Vector3 vec ) const;
-
-    // Multiply a 4x4 matrix by a 3-D point
-    // 
-    inline const Vector4 operator *( Point3 pnt ) const;
-
-    // Multiply two 4x4 matrices
-    // 
-    inline const Matrix4 operator *( const Matrix4 & mat ) const;
-
-    // Multiply a 4x4 matrix by a 3x4 transformation matrix
-    // 
-    inline const Matrix4 operator *( const Transform3 & tfrm ) const;
-
-    // Perform compound assignment and addition with a 4x4 matrix
-    // 
-    inline Matrix4 & operator +=( const Matrix4 & mat );
-
-    // Perform compound assignment and subtraction by a 4x4 matrix
-    // 
-    inline Matrix4 & operator -=( const Matrix4 & mat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Matrix4 & operator *=( float scalar );
-
-    // Perform compound assignment and multiplication by a 4x4 matrix
-    // 
-    inline Matrix4 & operator *=( const Matrix4 & mat );
-
-    // Perform compound assignment and multiplication by a 3x4 transformation matrix
-    // 
-    inline Matrix4 & operator *=( const Transform3 & tfrm );
-
-    // Construct an identity 4x4 matrix
-    // 
-    static inline const Matrix4 identity( );
-
-    // Construct a 4x4 matrix to rotate around the x axis
-    // 
-    static inline const Matrix4 rotationX( float radians );
-
-    // Construct a 4x4 matrix to rotate around the y axis
-    // 
-    static inline const Matrix4 rotationY( float radians );
-
-    // Construct a 4x4 matrix to rotate around the z axis
-    // 
-    static inline const Matrix4 rotationZ( float radians );
-
-    // Construct a 4x4 matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Matrix4 rotationZYX( Vector3 radiansXYZ );
-
-    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Matrix4 rotation( float radians, Vector3 unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Matrix4 rotation( Quat unitQuat );
-
-    // Construct a 4x4 matrix to perform scaling
-    // 
-    static inline const Matrix4 scale( Vector3 scaleVec );
-
-    // Construct a 4x4 matrix to perform translation
-    // 
-    static inline const Matrix4 translation( Vector3 translateVec );
-
-    // Construct viewing matrix based on eye position, position looked at, and up direction
-    // 
-    static inline const Matrix4 lookAt( Point3 eyePos, Point3 lookAtPos, Vector3 upVec );
-
-    // Construct a perspective projection matrix
-    // 
-    static inline const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar );
-
-    // Construct a perspective projection matrix based on frustum
-    // 
-    static inline const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar );
-
-    // Construct an orthographic projection matrix
-    // 
-    static inline const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar );
-
-};
-// Multiply a 4x4 matrix by a scalar
-// 
-inline const Matrix4 operator *( float scalar, const Matrix4 & mat );
-
-// Append (post-multiply) a scale transformation to a 4x4 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix4 appendScale( const Matrix4 & mat, Vector3 scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix4 prependScale( Vector3 scaleVec, const Matrix4 & mat );
-
-// Multiply two 4x4 matrices per element
-// 
-inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
-
-// Compute the absolute value of a 4x4 matrix per element
-// 
-inline const Matrix4 absPerElem( const Matrix4 & mat );
-
-// Transpose of a 4x4 matrix
-// 
-inline const Matrix4 transpose( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix
-// NOTE: 
-// Result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix4 inverse( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix4 affineInverse( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
-// 
-inline const Matrix4 orthoInverse( const Matrix4 & mat );
-
-// Determinant of a 4x4 matrix
-// 
-inline float determinant( const Matrix4 & mat );
-
-// Conditionally select between two 4x4 matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 4x4 matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix4 & mat );
-
-// Print a 4x4 matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix4 & mat, const char * name );
-
-#endif
-
-// A 3x4 transformation matrix in array-of-structures format
-//
-class Transform3
-{
-    Vector3 mCol0;
-    Vector3 mCol1;
-    Vector3 mCol2;
-    Vector3 mCol3;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Transform3( ) { };
-
-    // Copy a 3x4 transformation matrix
-    // 
-    inline Transform3( const Transform3 & tfrm );
-
-    // Construct a 3x4 transformation matrix containing the specified columns
-    // 
-    inline Transform3( Vector3 col0, Vector3 col1, Vector3 col2, Vector3 col3 );
-
-    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
-    // 
-    inline Transform3( const Matrix3 & tfrm, Vector3 translateVec );
-
-    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
-    // 
-    inline Transform3( Quat unitQuat, Vector3 translateVec );
-
-    // Set all elements of a 3x4 transformation matrix to the same scalar value
-    // 
-    explicit inline Transform3( float scalar );
-
-    // Assign one 3x4 transformation matrix to another
-    // 
-    inline Transform3 & operator =( const Transform3 & tfrm );
-
-    // Set the upper-left 3x3 submatrix
-    // 
-    inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
-
-    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
-    // 
-    inline const Matrix3 getUpper3x3( ) const;
-
-    // Set translation component
-    // 
-    inline Transform3 & setTranslation( Vector3 translateVec );
-
-    // Get the translation component of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getTranslation( ) const;
-
-    // Set column 0 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol0( Vector3 col0 );
-
-    // Set column 1 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol1( Vector3 col1 );
-
-    // Set column 2 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol2( Vector3 col2 );
-
-    // Set column 3 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol3( Vector3 col3 );
-
-    // Get column 0 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol0( ) const;
-
-    // Get column 1 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol1( ) const;
-
-    // Get column 2 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol2( ) const;
-
-    // Get column 3 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol3( ) const;
-
-    // Set the column of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline Transform3 & setCol( int col, Vector3 vec );
-
-    // Set the row of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline Transform3 & setRow( int row, Vector4 vec );
-
-    // Get the column of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline const Vector3 getCol( int col ) const;
-
-    // Get the row of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline const Vector4 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector3 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector3 operator []( int col ) const;
-
-    // Set the element of a 3x4 transformation matrix referred to by column and row indices
-    // 
-    inline Transform3 & setElem( int col, int row, float val );
-
-    // Get the element of a 3x4 transformation matrix referred to by column and row indices
-    // 
-    inline float getElem( int col, int row ) const;
-
-    // Multiply a 3x4 transformation matrix by a 3-D vector
-    // 
-    inline const Vector3 operator *( Vector3 vec ) const;
-
-    // Multiply a 3x4 transformation matrix by a 3-D point
-    // 
-    inline const Point3 operator *( Point3 pnt ) const;
-
-    // Multiply two 3x4 transformation matrices
-    // 
-    inline const Transform3 operator *( const Transform3 & tfrm ) const;
-
-    // Perform compound assignment and multiplication by a 3x4 transformation matrix
-    // 
-    inline Transform3 & operator *=( const Transform3 & tfrm );
-
-    // Construct an identity 3x4 transformation matrix
-    // 
-    static inline const Transform3 identity( );
-
-    // Construct a 3x4 transformation matrix to rotate around the x axis
-    // 
-    static inline const Transform3 rotationX( float radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the y axis
-    // 
-    static inline const Transform3 rotationY( float radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the z axis
-    // 
-    static inline const Transform3 rotationZ( float radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Transform3 rotationZYX( Vector3 radiansXYZ );
-
-    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Transform3 rotation( float radians, Vector3 unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Transform3 rotation( Quat unitQuat );
-
-    // Construct a 3x4 transformation matrix to perform scaling
-    // 
-    static inline const Transform3 scale( Vector3 scaleVec );
-
-    // Construct a 3x4 transformation matrix to perform translation
-    // 
-    static inline const Transform3 translation( Vector3 translateVec );
-
-};
-// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Transform3 appendScale( const Transform3 & tfrm, Vector3 scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Transform3 prependScale( Vector3 scaleVec, const Transform3 & tfrm );
-
-// Multiply two 3x4 transformation matrices per element
-// 
-inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
-
-// Compute the absolute value of a 3x4 transformation matrix per element
-// 
-inline const Transform3 absPerElem( const Transform3 & tfrm );
-
-// Inverse of a 3x4 transformation matrix
-// NOTE: 
-// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
-// 
-inline const Transform3 inverse( const Transform3 & tfrm );
-
-// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
-// 
-inline const Transform3 orthoInverse( const Transform3 & tfrm );
-
-// Conditionally select between two 3x4 transformation matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3x4 transformation matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Transform3 & tfrm );
-
-// Print a 3x4 transformation matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Transform3 & tfrm, const char * name );
-
-#endif
-
-} // namespace Aos
-} // namespace Vectormath
-
-#include "vec_aos.h"
-#include "quat_aos.h"
-#include "mat_aos.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_AOS_CPP_SPU_H
+#define _VECTORMATH_AOS_CPP_SPU_H
+
+#include <math.h>
+#include <spu_intrinsics.h>
+#include "floatInVec.h"
+#include "boolInVec.h"
+#include "vecidx_aos.h"
+#include <stdio.h>
+
+#ifdef _VECTORMATH_DEBUG
+#endif
+
+namespace Vectormath {
+
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// Forward Declarations
+//
+
+class Vector3;
+class Vector4;
+class Point3;
+class Quat;
+class Matrix3;
+class Matrix4;
+class Transform3;
+
+// A 3-D vector in array-of-structures format
+//
+class Vector3
+{
+    vec_float4 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector3( ) { };
+
+    // Construct a 3-D vector from x, y, and z elements
+    // 
+    inline Vector3( float x, float y, float z );
+
+    // Copy elements from a 3-D point into a 3-D vector
+    // 
+    explicit inline Vector3( Point3 pnt );
+
+    // Set all elements of a 3-D vector to the same scalar value
+    // 
+    explicit inline Vector3( float scalar );
+
+    // Set vector float data in a 3-D vector
+    // 
+    explicit inline Vector3( vec_float4 vf4 );
+
+    // Get vector float data from a 3-D vector
+    // 
+    inline vec_float4 get128( ) const;
+
+    // Assign one 3-D vector to another
+    // 
+    inline Vector3 & operator =( Vector3 vec );
+
+    // Set the x element of a 3-D vector
+    // 
+    inline Vector3 & setX( float x );
+
+    // Set the y element of a 3-D vector
+    // 
+    inline Vector3 & setY( float y );
+
+    // Set the z element of a 3-D vector
+    // 
+    inline Vector3 & setZ( float z );
+
+    // Get the x element of a 3-D vector
+    // 
+    inline float getX( ) const;
+
+    // Get the y element of a 3-D vector
+    // 
+    inline float getY( ) const;
+
+    // Get the z element of a 3-D vector
+    // 
+    inline float getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D vector by index
+    // 
+    inline Vector3 & setElem( int idx, float value );
+
+    // Get an x, y, or z element of a 3-D vector by index
+    // 
+    inline float getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline float operator []( int idx ) const;
+
+    // Add two 3-D vectors
+    // 
+    inline const Vector3 operator +( Vector3 vec ) const;
+
+    // Subtract a 3-D vector from another 3-D vector
+    // 
+    inline const Vector3 operator -( Vector3 vec ) const;
+
+    // Add a 3-D vector to a 3-D point
+    // 
+    inline const Point3 operator +( Point3 pnt ) const;
+
+    // Multiply a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator *( float scalar ) const;
+
+    // Divide a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator /( float scalar ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Vector3 & operator +=( Vector3 vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Vector3 & operator -=( Vector3 vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector3 & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector3 & operator /=( float scalar );
+
+    // Negate all elements of a 3-D vector
+    // 
+    inline const Vector3 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector3 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector3 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector3 zAxis( );
+
+};
+
+// Multiply a 3-D vector by a scalar
+// 
+inline const Vector3 operator *( float scalar, Vector3 vec );
+
+// Multiply two 3-D vectors per element
+// 
+inline const Vector3 mulPerElem( Vector3 vec0, Vector3 vec1 );
+
+// Divide two 3-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector3 divPerElem( Vector3 vec0, Vector3 vec1 );
+
+// Compute the reciprocal of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector3 recipPerElem( Vector3 vec );
+
+// Compute the square root of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Vector3 sqrtPerElem( Vector3 vec );
+
+// Compute the reciprocal square root of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Vector3 rsqrtPerElem( Vector3 vec );
+
+// Compute the absolute value of a 3-D vector per element
+// 
+inline const Vector3 absPerElem( Vector3 vec );
+
+// Copy sign from one 3-D vector to another, per element
+// 
+inline const Vector3 copySignPerElem( Vector3 vec0, Vector3 vec1 );
+
+// Maximum of two 3-D vectors per element
+// 
+inline const Vector3 maxPerElem( Vector3 vec0, Vector3 vec1 );
+
+// Minimum of two 3-D vectors per element
+// 
+inline const Vector3 minPerElem( Vector3 vec0, Vector3 vec1 );
+
+// Maximum element of a 3-D vector
+// 
+inline float maxElem( Vector3 vec );
+
+// Minimum element of a 3-D vector
+// 
+inline float minElem( Vector3 vec );
+
+// Compute the sum of all elements of a 3-D vector
+// 
+inline float sum( Vector3 vec );
+
+// Compute the dot product of two 3-D vectors
+// 
+inline float dot( Vector3 vec0, Vector3 vec1 );
+
+// Compute the square of the length of a 3-D vector
+// 
+inline float lengthSqr( Vector3 vec );
+
+// Compute the length of a 3-D vector
+// 
+inline float length( Vector3 vec );
+
+// Normalize a 3-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector3 normalize( Vector3 vec );
+
+// Compute cross product of two 3-D vectors
+// 
+inline const Vector3 cross( Vector3 vec0, Vector3 vec1 );
+
+// Outer product of two 3-D vectors
+// 
+inline const Matrix3 outer( Vector3 vec0, Vector3 vec1 );
+
+// Pre-multiply a row vector by a 3x3 matrix
+// NOTE: 
+// Slower than column post-multiply.
+// 
+inline const Vector3 rowMul( Vector3 vec, const Matrix3 & mat );
+
+// Cross-product matrix of a 3-D vector
+// 
+inline const Matrix3 crossMatrix( Vector3 vec );
+
+// Create cross-product matrix and multiply
+// NOTE: 
+// Faster than separately creating a cross-product matrix and multiplying.
+// 
+inline const Matrix3 crossMatrixMul( Vector3 vec, const Matrix3 & mat );
+
+// Linear interpolation between two 3-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 lerp( float t, Vector3 vec0, Vector3 vec1 );
+
+// Spherical linear interpolation between two 3-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 slerp( float t, Vector3 unitVec0, Vector3 unitVec1 );
+
+// Conditionally select between two 3-D vectors
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Vector3 select( Vector3 vec0, Vector3 vec1, bool select1 );
+
+// Store x, y, and z elements of a 3-D vector in the first three words of a quadword.
+// The value of the fourth word (the word with the highest address) remains unchanged
+// 
+inline void storeXYZ( Vector3 vec, vec_float4 * quad );
+
+// Load four three-float 3-D vectors, stored in three quadwords
+// 
+inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const vec_float4 * threeQuads );
+
+// Store four 3-D vectors in three quadwords
+// 
+inline void storeXYZArray( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, vec_float4 * threeQuads );
+
+// Store eight 3-D vectors as half-floats
+// 
+inline void storeHalfFloats( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, Vector3 vec4, Vector3 vec5, Vector3 vec6, Vector3 vec7, vec_ushort8 * threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Vector3 vec );
+
+// Print a 3-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Vector3 vec, const char * name );
+
+#endif
+
+// A 4-D vector in array-of-structures format
+//
+class Vector4
+{
+    vec_float4 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector4( ) { };
+
+    // Construct a 4-D vector from x, y, z, and w elements
+    // 
+    inline Vector4( float x, float y, float z, float w );
+
+    // Construct a 4-D vector from a 3-D vector and a scalar
+    // 
+    inline Vector4( Vector3 xyz, float w );
+
+    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+    // 
+    explicit inline Vector4( Vector3 vec );
+
+    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+    // 
+    explicit inline Vector4( Point3 pnt );
+
+    // Copy elements from a quaternion into a 4-D vector
+    // 
+    explicit inline Vector4( Quat quat );
+
+    // Set all elements of a 4-D vector to the same scalar value
+    // 
+    explicit inline Vector4( float scalar );
+
+    // Set vector float data in a 4-D vector
+    // 
+    explicit inline Vector4( vec_float4 vf4 );
+
+    // Get vector float data from a 4-D vector
+    // 
+    inline vec_float4 get128( ) const;
+
+    // Assign one 4-D vector to another
+    // 
+    inline Vector4 & operator =( Vector4 vec );
+
+    // Set the x, y, and z elements of a 4-D vector
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Vector4 & setXYZ( Vector3 vec );
+
+    // Get the x, y, and z elements of a 4-D vector
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a 4-D vector
+    // 
+    inline Vector4 & setX( float x );
+
+    // Set the y element of a 4-D vector
+    // 
+    inline Vector4 & setY( float y );
+
+    // Set the z element of a 4-D vector
+    // 
+    inline Vector4 & setZ( float z );
+
+    // Set the w element of a 4-D vector
+    // 
+    inline Vector4 & setW( float w );
+
+    // Get the x element of a 4-D vector
+    // 
+    inline float getX( ) const;
+
+    // Get the y element of a 4-D vector
+    // 
+    inline float getY( ) const;
+
+    // Get the z element of a 4-D vector
+    // 
+    inline float getZ( ) const;
+
+    // Get the w element of a 4-D vector
+    // 
+    inline float getW( ) const;
+
+    // Set an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline Vector4 & setElem( int idx, float value );
+
+    // Get an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline float getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline float operator []( int idx ) const;
+
+    // Add two 4-D vectors
+    // 
+    inline const Vector4 operator +( Vector4 vec ) const;
+
+    // Subtract a 4-D vector from another 4-D vector
+    // 
+    inline const Vector4 operator -( Vector4 vec ) const;
+
+    // Multiply a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator *( float scalar ) const;
+
+    // Divide a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator /( float scalar ) const;
+
+    // Perform compound assignment and addition with a 4-D vector
+    // 
+    inline Vector4 & operator +=( Vector4 vec );
+
+    // Perform compound assignment and subtraction by a 4-D vector
+    // 
+    inline Vector4 & operator -=( Vector4 vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector4 & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector4 & operator /=( float scalar );
+
+    // Negate all elements of a 4-D vector
+    // 
+    inline const Vector4 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector4 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector4 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector4 zAxis( );
+
+    // Construct w axis
+    // 
+    static inline const Vector4 wAxis( );
+
+};
+
+// Multiply a 4-D vector by a scalar
+// 
+inline const Vector4 operator *( float scalar, Vector4 vec );
+
+// Multiply two 4-D vectors per element
+// 
+inline const Vector4 mulPerElem( Vector4 vec0, Vector4 vec1 );
+
+// Divide two 4-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector4 divPerElem( Vector4 vec0, Vector4 vec1 );
+
+// Compute the reciprocal of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector4 recipPerElem( Vector4 vec );
+
+// Compute the square root of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Vector4 sqrtPerElem( Vector4 vec );
+
+// Compute the reciprocal square root of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Vector4 rsqrtPerElem( Vector4 vec );
+
+// Compute the absolute value of a 4-D vector per element
+// 
+inline const Vector4 absPerElem( Vector4 vec );
+
+// Copy sign from one 4-D vector to another, per element
+// 
+inline const Vector4 copySignPerElem( Vector4 vec0, Vector4 vec1 );
+
+// Maximum of two 4-D vectors per element
+// 
+inline const Vector4 maxPerElem( Vector4 vec0, Vector4 vec1 );
+
+// Minimum of two 4-D vectors per element
+// 
+inline const Vector4 minPerElem( Vector4 vec0, Vector4 vec1 );
+
+// Maximum element of a 4-D vector
+// 
+inline float maxElem( Vector4 vec );
+
+// Minimum element of a 4-D vector
+// 
+inline float minElem( Vector4 vec );
+
+// Compute the sum of all elements of a 4-D vector
+// 
+inline float sum( Vector4 vec );
+
+// Compute the dot product of two 4-D vectors
+// 
+inline float dot( Vector4 vec0, Vector4 vec1 );
+
+// Compute the square of the length of a 4-D vector
+// 
+inline float lengthSqr( Vector4 vec );
+
+// Compute the length of a 4-D vector
+// 
+inline float length( Vector4 vec );
+
+// Normalize a 4-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector4 normalize( Vector4 vec );
+
+// Outer product of two 4-D vectors
+// 
+inline const Matrix4 outer( Vector4 vec0, Vector4 vec1 );
+
+// Linear interpolation between two 4-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 lerp( float t, Vector4 vec0, Vector4 vec1 );
+
+// Spherical linear interpolation between two 4-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 slerp( float t, Vector4 unitVec0, Vector4 unitVec1 );
+
+// Conditionally select between two 4-D vectors
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Vector4 select( Vector4 vec0, Vector4 vec1, bool select1 );
+
+// Store four 4-D vectors as half-floats
+// 
+inline void storeHalfFloats( Vector4 vec0, Vector4 vec1, Vector4 vec2, Vector4 vec3, vec_ushort8 * twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Vector4 vec );
+
+// Print a 4-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Vector4 vec, const char * name );
+
+#endif
+
+// A 3-D point in array-of-structures format
+//
+class Point3
+{
+    vec_float4 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Point3( ) { };
+
+    // Construct a 3-D point from x, y, and z elements
+    // 
+    inline Point3( float x, float y, float z );
+
+    // Copy elements from a 3-D vector into a 3-D point
+    // 
+    explicit inline Point3( Vector3 vec );
+
+    // Set all elements of a 3-D point to the same scalar value
+    // 
+    explicit inline Point3( float scalar );
+
+    // Set vector float data in a 3-D point
+    // 
+    explicit inline Point3( vec_float4 vf4 );
+
+    // Get vector float data from a 3-D point
+    // 
+    inline vec_float4 get128( ) const;
+
+    // Assign one 3-D point to another
+    // 
+    inline Point3 & operator =( Point3 pnt );
+
+    // Set the x element of a 3-D point
+    // 
+    inline Point3 & setX( float x );
+
+    // Set the y element of a 3-D point
+    // 
+    inline Point3 & setY( float y );
+
+    // Set the z element of a 3-D point
+    // 
+    inline Point3 & setZ( float z );
+
+    // Get the x element of a 3-D point
+    // 
+    inline float getX( ) const;
+
+    // Get the y element of a 3-D point
+    // 
+    inline float getY( ) const;
+
+    // Get the z element of a 3-D point
+    // 
+    inline float getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D point by index
+    // 
+    inline Point3 & setElem( int idx, float value );
+
+    // Get an x, y, or z element of a 3-D point by index
+    // 
+    inline float getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline float operator []( int idx ) const;
+
+    // Subtract a 3-D point from another 3-D point
+    // 
+    inline const Vector3 operator -( Point3 pnt ) const;
+
+    // Add a 3-D point to a 3-D vector
+    // 
+    inline const Point3 operator +( Vector3 vec ) const;
+
+    // Subtract a 3-D vector from a 3-D point
+    // 
+    inline const Point3 operator -( Vector3 vec ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Point3 & operator +=( Vector3 vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Point3 & operator -=( Vector3 vec );
+
+};
+
+// Multiply two 3-D points per element
+// 
+inline const Point3 mulPerElem( Point3 pnt0, Point3 pnt1 );
+
+// Divide two 3-D points per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Point3 divPerElem( Point3 pnt0, Point3 pnt1 );
+
+// Compute the reciprocal of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Point3 recipPerElem( Point3 pnt );
+
+// Compute the square root of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Point3 sqrtPerElem( Point3 pnt );
+
+// Compute the reciprocal square root of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Point3 rsqrtPerElem( Point3 pnt );
+
+// Compute the absolute value of a 3-D point per element
+// 
+inline const Point3 absPerElem( Point3 pnt );
+
+// Copy sign from one 3-D point to another, per element
+// 
+inline const Point3 copySignPerElem( Point3 pnt0, Point3 pnt1 );
+
+// Maximum of two 3-D points per element
+// 
+inline const Point3 maxPerElem( Point3 pnt0, Point3 pnt1 );
+
+// Minimum of two 3-D points per element
+// 
+inline const Point3 minPerElem( Point3 pnt0, Point3 pnt1 );
+
+// Maximum element of a 3-D point
+// 
+inline float maxElem( Point3 pnt );
+
+// Minimum element of a 3-D point
+// 
+inline float minElem( Point3 pnt );
+
+// Compute the sum of all elements of a 3-D point
+// 
+inline float sum( Point3 pnt );
+
+// Apply uniform scale to a 3-D point
+// 
+inline const Point3 scale( Point3 pnt, float scaleVal );
+
+// Apply non-uniform scale to a 3-D point
+// 
+inline const Point3 scale( Point3 pnt, Vector3 scaleVec );
+
+// Scalar projection of a 3-D point on a unit-length 3-D vector
+// 
+inline float projection( Point3 pnt, Vector3 unitVec );
+
+// Compute the square of the distance of a 3-D point from the coordinate-system origin
+// 
+inline float distSqrFromOrigin( Point3 pnt );
+
+// Compute the distance of a 3-D point from the coordinate-system origin
+// 
+inline float distFromOrigin( Point3 pnt );
+
+// Compute the square of the distance between two 3-D points
+// 
+inline float distSqr( Point3 pnt0, Point3 pnt1 );
+
+// Compute the distance between two 3-D points
+// 
+inline float dist( Point3 pnt0, Point3 pnt1 );
+
+// Linear interpolation between two 3-D points
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Point3 lerp( float t, Point3 pnt0, Point3 pnt1 );
+
+// Conditionally select between two 3-D points
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Point3 select( Point3 pnt0, Point3 pnt1, bool select1 );
+
+// Store x, y, and z elements of a 3-D point in the first three words of a quadword.
+// The value of the fourth word (the word with the highest address) remains unchanged
+// 
+inline void storeXYZ( Point3 pnt, vec_float4 * quad );
+
+// Load four three-float 3-D points, stored in three quadwords
+// 
+inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const vec_float4 * threeQuads );
+
+// Store four 3-D points in three quadwords
+// 
+inline void storeXYZArray( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, vec_float4 * threeQuads );
+
+// Store eight 3-D points as half-floats
+// 
+inline void storeHalfFloats( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, Point3 pnt4, Point3 pnt5, Point3 pnt6, Point3 pnt7, vec_ushort8 * threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D point
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Point3 pnt );
+
+// Print a 3-D point and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Point3 pnt, const char * name );
+
+#endif
+
+// A quaternion in array-of-structures format
+//
+class Quat
+{
+    vec_float4 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Quat( ) { };
+
+    // Construct a quaternion from x, y, z, and w elements
+    // 
+    inline Quat( float x, float y, float z, float w );
+
+    // Construct a quaternion from a 3-D vector and a scalar
+    // 
+    inline Quat( Vector3 xyz, float w );
+
+    // Copy elements from a 4-D vector into a quaternion
+    // 
+    explicit inline Quat( Vector4 vec );
+
+    // Convert a rotation matrix to a unit-length quaternion
+    // 
+    explicit inline Quat( const Matrix3 & rotMat );
+
+    // Set all elements of a quaternion to the same scalar value
+    // 
+    explicit inline Quat( float scalar );
+
+    // Set vector float data in a quaternion
+    // 
+    explicit inline Quat( vec_float4 vf4 );
+
+    // Get vector float data from a quaternion
+    // 
+    inline vec_float4 get128( ) const;
+
+    // Assign one quaternion to another
+    // 
+    inline Quat & operator =( Quat quat );
+
+    // Set the x, y, and z elements of a quaternion
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Quat & setXYZ( Vector3 vec );
+
+    // Get the x, y, and z elements of a quaternion
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a quaternion
+    // 
+    inline Quat & setX( float x );
+
+    // Set the y element of a quaternion
+    // 
+    inline Quat & setY( float y );
+
+    // Set the z element of a quaternion
+    // 
+    inline Quat & setZ( float z );
+
+    // Set the w element of a quaternion
+    // 
+    inline Quat & setW( float w );
+
+    // Get the x element of a quaternion
+    // 
+    inline float getX( ) const;
+
+    // Get the y element of a quaternion
+    // 
+    inline float getY( ) const;
+
+    // Get the z element of a quaternion
+    // 
+    inline float getZ( ) const;
+
+    // Get the w element of a quaternion
+    // 
+    inline float getW( ) const;
+
+    // Set an x, y, z, or w element of a quaternion by index
+    // 
+    inline Quat & setElem( int idx, float value );
+
+    // Get an x, y, z, or w element of a quaternion by index
+    // 
+    inline float getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline float operator []( int idx ) const;
+
+    // Add two quaternions
+    // 
+    inline const Quat operator +( Quat quat ) const;
+
+    // Subtract a quaternion from another quaternion
+    // 
+    inline const Quat operator -( Quat quat ) const;
+
+    // Multiply two quaternions
+    // 
+    inline const Quat operator *( Quat quat ) const;
+
+    // Multiply a quaternion by a scalar
+    // 
+    inline const Quat operator *( float scalar ) const;
+
+    // Divide a quaternion by a scalar
+    // 
+    inline const Quat operator /( float scalar ) const;
+
+    // Perform compound assignment and addition with a quaternion
+    // 
+    inline Quat & operator +=( Quat quat );
+
+    // Perform compound assignment and subtraction by a quaternion
+    // 
+    inline Quat & operator -=( Quat quat );
+
+    // Perform compound assignment and multiplication by a quaternion
+    // 
+    inline Quat & operator *=( Quat quat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Quat & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Quat & operator /=( float scalar );
+
+    // Negate all elements of a quaternion
+    // 
+    inline const Quat operator -( ) const;
+
+    // Construct an identity quaternion
+    // 
+    static inline const Quat identity( );
+
+    // Construct a quaternion to rotate between two unit-length 3-D vectors
+    // NOTE: 
+    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+    // 
+    static inline const Quat rotation( Vector3 unitVec0, Vector3 unitVec1 );
+
+    // Construct a quaternion to rotate around a unit-length 3-D vector
+    // 
+    static inline const Quat rotation( float radians, Vector3 unitVec );
+
+    // Construct a quaternion to rotate around the x axis
+    // 
+    static inline const Quat rotationX( float radians );
+
+    // Construct a quaternion to rotate around the y axis
+    // 
+    static inline const Quat rotationY( float radians );
+
+    // Construct a quaternion to rotate around the z axis
+    // 
+    static inline const Quat rotationZ( float radians );
+
+};
+
+// Multiply a quaternion by a scalar
+// 
+inline const Quat operator *( float scalar, Quat quat );
+
+// Compute the conjugate of a quaternion
+// 
+inline const Quat conj( Quat quat );
+
+// Use a unit-length quaternion to rotate a 3-D vector
+// 
+inline const Vector3 rotate( Quat unitQuat, Vector3 vec );
+
+// Compute the dot product of two quaternions
+// 
+inline float dot( Quat quat0, Quat quat1 );
+
+// Compute the norm of a quaternion
+// 
+inline float norm( Quat quat );
+
+// Compute the length of a quaternion
+// 
+inline float length( Quat quat );
+
+// Normalize a quaternion
+// NOTE: 
+// The result is unpredictable when all elements of quat are at or near zero.
+// 
+inline const Quat normalize( Quat quat );
+
+// Linear interpolation between two quaternions
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat lerp( float t, Quat quat0, Quat quat1 );
+
+// Spherical linear interpolation between two quaternions
+// NOTE: 
+// Interpolates along the shortest path between orientations.
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat slerp( float t, Quat unitQuat0, Quat unitQuat1 );
+
+// Spherical quadrangle interpolation
+// 
+inline const Quat squad( float t, Quat unitQuat0, Quat unitQuat1, Quat unitQuat2, Quat unitQuat3 );
+
+// Conditionally select between two quaternions
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Quat select( Quat quat0, Quat quat1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a quaternion
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Quat quat );
+
+// Print a quaternion and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Quat quat, const char * name );
+
+#endif
+
+// A 3x3 matrix in array-of-structures format
+//
+class Matrix3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix3( ) { };
+
+    // Copy a 3x3 matrix
+    // 
+    inline Matrix3( const Matrix3 & mat );
+
+    // Construct a 3x3 matrix containing the specified columns
+    // 
+    inline Matrix3( Vector3 col0, Vector3 col1, Vector3 col2 );
+
+    // Construct a 3x3 rotation matrix from a unit-length quaternion
+    // 
+    explicit inline Matrix3( Quat unitQuat );
+
+    // Set all elements of a 3x3 matrix to the same scalar value
+    // 
+    explicit inline Matrix3( float scalar );
+
+    // Assign one 3x3 matrix to another
+    // 
+    inline Matrix3 & operator =( const Matrix3 & mat );
+
+    // Set column 0 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol0( Vector3 col0 );
+
+    // Set column 1 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol1( Vector3 col1 );
+
+    // Set column 2 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol2( Vector3 col2 );
+
+    // Get column 0 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Set the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setCol( int col, Vector3 vec );
+
+    // Set the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setRow( int row, Vector3 vec );
+
+    // Get the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline Matrix3 & setElem( int col, int row, float val );
+
+    // Get the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline float getElem( int col, int row ) const;
+
+    // Add two 3x3 matrices
+    // 
+    inline const Matrix3 operator +( const Matrix3 & mat ) const;
+
+    // Subtract a 3x3 matrix from another 3x3 matrix
+    // 
+    inline const Matrix3 operator -( const Matrix3 & mat ) const;
+
+    // Negate all elements of a 3x3 matrix
+    // 
+    inline const Matrix3 operator -( ) const;
+
+    // Multiply a 3x3 matrix by a scalar
+    // 
+    inline const Matrix3 operator *( float scalar ) const;
+
+    // Multiply a 3x3 matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( Vector3 vec ) const;
+
+    // Multiply two 3x3 matrices
+    // 
+    inline const Matrix3 operator *( const Matrix3 & mat ) const;
+
+    // Perform compound assignment and addition with a 3x3 matrix
+    // 
+    inline Matrix3 & operator +=( const Matrix3 & mat );
+
+    // Perform compound assignment and subtraction by a 3x3 matrix
+    // 
+    inline Matrix3 & operator -=( const Matrix3 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix3 & operator *=( float scalar );
+
+    // Perform compound assignment and multiplication by a 3x3 matrix
+    // 
+    inline Matrix3 & operator *=( const Matrix3 & mat );
+
+    // Construct an identity 3x3 matrix
+    // 
+    static inline const Matrix3 identity( );
+
+    // Construct a 3x3 matrix to rotate around the x axis
+    // 
+    static inline const Matrix3 rotationX( float radians );
+
+    // Construct a 3x3 matrix to rotate around the y axis
+    // 
+    static inline const Matrix3 rotationY( float radians );
+
+    // Construct a 3x3 matrix to rotate around the z axis
+    // 
+    static inline const Matrix3 rotationZ( float radians );
+
+    // Construct a 3x3 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix3 rotationZYX( Vector3 radiansXYZ );
+
+    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix3 rotation( float radians, Vector3 unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix3 rotation( Quat unitQuat );
+
+    // Construct a 3x3 matrix to perform scaling
+    // 
+    static inline const Matrix3 scale( Vector3 scaleVec );
+
+};
+// Multiply a 3x3 matrix by a scalar
+// 
+inline const Matrix3 operator *( float scalar, const Matrix3 & mat );
+
+// Append (post-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 appendScale( const Matrix3 & mat, Vector3 scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 prependScale( Vector3 scaleVec, const Matrix3 & mat );
+
+// Multiply two 3x3 matrices per element
+// 
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
+
+// Compute the absolute value of a 3x3 matrix per element
+// 
+inline const Matrix3 absPerElem( const Matrix3 & mat );
+
+// Transpose of a 3x3 matrix
+// 
+inline const Matrix3 transpose( const Matrix3 & mat );
+
+// Compute the inverse of a 3x3 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix3 inverse( const Matrix3 & mat );
+
+// Determinant of a 3x3 matrix
+// 
+inline float determinant( const Matrix3 & mat );
+
+// Conditionally select between two 3x3 matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x3 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat );
+
+// Print a 3x3 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat, const char * name );
+
+#endif
+
+// A 4x4 matrix in array-of-structures format
+//
+class Matrix4
+{
+    Vector4 mCol0;
+    Vector4 mCol1;
+    Vector4 mCol2;
+    Vector4 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix4( ) { };
+
+    // Copy a 4x4 matrix
+    // 
+    inline Matrix4( const Matrix4 & mat );
+
+    // Construct a 4x4 matrix containing the specified columns
+    // 
+    inline Matrix4( Vector4 col0, Vector4 col1, Vector4 col2, Vector4 col3 );
+
+    // Construct a 4x4 matrix from a 3x4 transformation matrix
+    // 
+    explicit inline Matrix4( const Transform3 & mat );
+
+    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Matrix4( const Matrix3 & mat, Vector3 translateVec );
+
+    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Matrix4( Quat unitQuat, Vector3 translateVec );
+
+    // Set all elements of a 4x4 matrix to the same scalar value
+    // 
+    explicit inline Matrix4( float scalar );
+
+    // Assign one 4x4 matrix to another
+    // 
+    inline Matrix4 & operator =( const Matrix4 & mat );
+
+    // Set the upper-left 3x3 submatrix
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 4x4 matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setTranslation( Vector3 translateVec );
+
+    // Get the translation component of a 4x4 matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol0( Vector4 col0 );
+
+    // Set column 1 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol1( Vector4 col1 );
+
+    // Set column 2 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol2( Vector4 col2 );
+
+    // Set column 3 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol3( Vector4 col3 );
+
+    // Get column 0 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol0( ) const;
+
+    // Get column 1 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol1( ) const;
+
+    // Get column 2 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol2( ) const;
+
+    // Get column 3 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol3( ) const;
+
+    // Set the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setCol( int col, Vector4 vec );
+
+    // Set the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setRow( int row, Vector4 vec );
+
+    // Get the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getCol( int col ) const;
+
+    // Get the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector4 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector4 operator []( int col ) const;
+
+    // Set the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline Matrix4 & setElem( int col, int row, float val );
+
+    // Get the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline float getElem( int col, int row ) const;
+
+    // Add two 4x4 matrices
+    // 
+    inline const Matrix4 operator +( const Matrix4 & mat ) const;
+
+    // Subtract a 4x4 matrix from another 4x4 matrix
+    // 
+    inline const Matrix4 operator -( const Matrix4 & mat ) const;
+
+    // Negate all elements of a 4x4 matrix
+    // 
+    inline const Matrix4 operator -( ) const;
+
+    // Multiply a 4x4 matrix by a scalar
+    // 
+    inline const Matrix4 operator *( float scalar ) const;
+
+    // Multiply a 4x4 matrix by a 4-D vector
+    // 
+    inline const Vector4 operator *( Vector4 vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D vector
+    // 
+    inline const Vector4 operator *( Vector3 vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D point
+    // 
+    inline const Vector4 operator *( Point3 pnt ) const;
+
+    // Multiply two 4x4 matrices
+    // 
+    inline const Matrix4 operator *( const Matrix4 & mat ) const;
+
+    // Multiply a 4x4 matrix by a 3x4 transformation matrix
+    // 
+    inline const Matrix4 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and addition with a 4x4 matrix
+    // 
+    inline Matrix4 & operator +=( const Matrix4 & mat );
+
+    // Perform compound assignment and subtraction by a 4x4 matrix
+    // 
+    inline Matrix4 & operator -=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix4 & operator *=( float scalar );
+
+    // Perform compound assignment and multiplication by a 4x4 matrix
+    // 
+    inline Matrix4 & operator *=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Matrix4 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 4x4 matrix
+    // 
+    static inline const Matrix4 identity( );
+
+    // Construct a 4x4 matrix to rotate around the x axis
+    // 
+    static inline const Matrix4 rotationX( float radians );
+
+    // Construct a 4x4 matrix to rotate around the y axis
+    // 
+    static inline const Matrix4 rotationY( float radians );
+
+    // Construct a 4x4 matrix to rotate around the z axis
+    // 
+    static inline const Matrix4 rotationZ( float radians );
+
+    // Construct a 4x4 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix4 rotationZYX( Vector3 radiansXYZ );
+
+    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix4 rotation( float radians, Vector3 unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix4 rotation( Quat unitQuat );
+
+    // Construct a 4x4 matrix to perform scaling
+    // 
+    static inline const Matrix4 scale( Vector3 scaleVec );
+
+    // Construct a 4x4 matrix to perform translation
+    // 
+    static inline const Matrix4 translation( Vector3 translateVec );
+
+    // Construct viewing matrix based on eye position, position looked at, and up direction
+    // 
+    static inline const Matrix4 lookAt( Point3 eyePos, Point3 lookAtPos, Vector3 upVec );
+
+    // Construct a perspective projection matrix
+    // 
+    static inline const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar );
+
+    // Construct a perspective projection matrix based on frustum
+    // 
+    static inline const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar );
+
+    // Construct an orthographic projection matrix
+    // 
+    static inline const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar );
+
+};
+// Multiply a 4x4 matrix by a scalar
+// 
+inline const Matrix4 operator *( float scalar, const Matrix4 & mat );
+
+// Append (post-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 appendScale( const Matrix4 & mat, Vector3 scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 prependScale( Vector3 scaleVec, const Matrix4 & mat );
+
+// Multiply two 4x4 matrices per element
+// 
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
+
+// Compute the absolute value of a 4x4 matrix per element
+// 
+inline const Matrix4 absPerElem( const Matrix4 & mat );
+
+// Transpose of a 4x4 matrix
+// 
+inline const Matrix4 transpose( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 inverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 affineInverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+// 
+inline const Matrix4 orthoInverse( const Matrix4 & mat );
+
+// Determinant of a 4x4 matrix
+// 
+inline float determinant( const Matrix4 & mat );
+
+// Conditionally select between two 4x4 matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4x4 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat );
+
+// Print a 4x4 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat, const char * name );
+
+#endif
+
+// A 3x4 transformation matrix in array-of-structures format
+//
+class Transform3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+    Vector3 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Transform3( ) { };
+
+    // Copy a 3x4 transformation matrix
+    // 
+    inline Transform3( const Transform3 & tfrm );
+
+    // Construct a 3x4 transformation matrix containing the specified columns
+    // 
+    inline Transform3( Vector3 col0, Vector3 col1, Vector3 col2, Vector3 col3 );
+
+    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Transform3( const Matrix3 & tfrm, Vector3 translateVec );
+
+    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Transform3( Quat unitQuat, Vector3 translateVec );
+
+    // Set all elements of a 3x4 transformation matrix to the same scalar value
+    // 
+    explicit inline Transform3( float scalar );
+
+    // Assign one 3x4 transformation matrix to another
+    // 
+    inline Transform3 & operator =( const Transform3 & tfrm );
+
+    // Set the upper-left 3x3 submatrix
+    // 
+    inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // 
+    inline Transform3 & setTranslation( Vector3 translateVec );
+
+    // Get the translation component of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol0( Vector3 col0 );
+
+    // Set column 1 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol1( Vector3 col1 );
+
+    // Set column 2 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol2( Vector3 col2 );
+
+    // Set column 3 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol3( Vector3 col3 );
+
+    // Get column 0 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Get column 3 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol3( ) const;
+
+    // Set the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setCol( int col, Vector3 vec );
+
+    // Set the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setRow( int row, Vector4 vec );
+
+    // Get the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline Transform3 & setElem( int col, int row, float val );
+
+    // Get the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline float getElem( int col, int row ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( Vector3 vec ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D point
+    // 
+    inline const Point3 operator *( Point3 pnt ) const;
+
+    // Multiply two 3x4 transformation matrices
+    // 
+    inline const Transform3 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Transform3 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 3x4 transformation matrix
+    // 
+    static inline const Transform3 identity( );
+
+    // Construct a 3x4 transformation matrix to rotate around the x axis
+    // 
+    static inline const Transform3 rotationX( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the y axis
+    // 
+    static inline const Transform3 rotationY( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the z axis
+    // 
+    static inline const Transform3 rotationZ( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Transform3 rotationZYX( Vector3 radiansXYZ );
+
+    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Transform3 rotation( float radians, Vector3 unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Transform3 rotation( Quat unitQuat );
+
+    // Construct a 3x4 transformation matrix to perform scaling
+    // 
+    static inline const Transform3 scale( Vector3 scaleVec );
+
+    // Construct a 3x4 transformation matrix to perform translation
+    // 
+    static inline const Transform3 translation( Vector3 translateVec );
+
+};
+// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 appendScale( const Transform3 & tfrm, Vector3 scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 prependScale( Vector3 scaleVec, const Transform3 & tfrm );
+
+// Multiply two 3x4 transformation matrices per element
+// 
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
+
+// Compute the absolute value of a 3x4 transformation matrix per element
+// 
+inline const Transform3 absPerElem( const Transform3 & tfrm );
+
+// Inverse of a 3x4 transformation matrix
+// NOTE: 
+// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+// 
+inline const Transform3 inverse( const Transform3 & tfrm );
+
+// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+// 
+inline const Transform3 orthoInverse( const Transform3 & tfrm );
+
+// Conditionally select between two 3x4 transformation matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x4 transformation matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm );
+
+// Print a 3x4 transformation matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm, const char * name );
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#include "vec_aos.h"
+#include "quat_aos.h"
+#include "mat_aos.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/cpp/vectormath_soa.h b/Extras/vectormathlibrary/include/vectormath/spu/cpp/vectormath_soa.h
index 71843ef14..c37927b38 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/cpp/vectormath_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/cpp/vectormath_soa.h
@@ -1,1921 +1,1921 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_SOA_CPP_SPU_H
-#define _VECTORMATH_SOA_CPP_SPU_H
-
-#include <math.h>
-#include <spu_intrinsics.h>
-#include "floatInVec.h"
-#include "boolInVec.h"
-#include "vectormath_aos.h"
-#include <stdio.h>
-
-#ifdef _VECTORMATH_DEBUG
-#endif
-
-namespace Vectormath {
-
-namespace Soa {
-
-//-----------------------------------------------------------------------------
-// Forward Declarations
-//
-
-class Vector3;
-class Vector4;
-class Point3;
-class Quat;
-class Matrix3;
-class Matrix4;
-class Transform3;
-
-// A set of four 3-D vectors in structure-of-arrays format
-//
-class Vector3
-{
-    typedef vec_float4 vec_float4_t;
-    vec_float4 mX;
-    vec_float4 mY;
-    vec_float4 mZ;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Vector3( ) { };
-
-    // Copy a 3-D vector
-    // 
-    inline Vector3( const Vector3 & vec );
-
-    // Construct a 3-D vector from x, y, and z elements
-    // 
-    inline Vector3( vec_float4 x, vec_float4 y, vec_float4 z );
-
-    // Copy elements from a 3-D point into a 3-D vector
-    // 
-    explicit inline Vector3( const Point3 & pnt );
-
-    // Set all elements of a 3-D vector to the same scalar value
-    // 
-    explicit inline Vector3( vec_float4 scalar );
-
-    // Replicate an AoS 3-D vector
-    // 
-    inline Vector3( Aos::Vector3 vec );
-
-    // Insert four AoS 3-D vectors
-    // 
-    inline Vector3( Aos::Vector3 vec0, Aos::Vector3 vec1, Aos::Vector3 vec2, Aos::Vector3 vec3 );
-
-    // Extract four AoS 3-D vectors
-    // 
-    inline void get4Aos( Aos::Vector3 & result0, Aos::Vector3 & result1, Aos::Vector3 & result2, Aos::Vector3 & result3 ) const;
-
-    // Assign one 3-D vector to another
-    // 
-    inline Vector3 & operator =( const Vector3 & vec );
-
-    // Set the x element of a 3-D vector
-    // 
-    inline Vector3 & setX( vec_float4 x );
-
-    // Set the y element of a 3-D vector
-    // 
-    inline Vector3 & setY( vec_float4 y );
-
-    // Set the z element of a 3-D vector
-    // 
-    inline Vector3 & setZ( vec_float4 z );
-
-    // Get the x element of a 3-D vector
-    // 
-    inline vec_float4 getX( ) const;
-
-    // Get the y element of a 3-D vector
-    // 
-    inline vec_float4 getY( ) const;
-
-    // Get the z element of a 3-D vector
-    // 
-    inline vec_float4 getZ( ) const;
-
-    // Set an x, y, or z element of a 3-D vector by index
-    // 
-    inline Vector3 & setElem( int idx, vec_float4 value );
-
-    // Get an x, y, or z element of a 3-D vector by index
-    // 
-    inline vec_float4 getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline vec_float4_t & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline vec_float4 operator []( int idx ) const;
-
-    // Add two 3-D vectors
-    // 
-    inline const Vector3 operator +( const Vector3 & vec ) const;
-
-    // Subtract a 3-D vector from another 3-D vector
-    // 
-    inline const Vector3 operator -( const Vector3 & vec ) const;
-
-    // Add a 3-D vector to a 3-D point
-    // 
-    inline const Point3 operator +( const Point3 & pnt ) const;
-
-    // Multiply a 3-D vector by a scalar
-    // 
-    inline const Vector3 operator *( vec_float4 scalar ) const;
-
-    // Divide a 3-D vector by a scalar
-    // 
-    inline const Vector3 operator /( vec_float4 scalar ) const;
-
-    // Perform compound assignment and addition with a 3-D vector
-    // 
-    inline Vector3 & operator +=( const Vector3 & vec );
-
-    // Perform compound assignment and subtraction by a 3-D vector
-    // 
-    inline Vector3 & operator -=( const Vector3 & vec );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Vector3 & operator *=( vec_float4 scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Vector3 & operator /=( vec_float4 scalar );
-
-    // Negate all elements of a 3-D vector
-    // 
-    inline const Vector3 operator -( ) const;
-
-    // Construct x axis
-    // 
-    static inline const Vector3 xAxis( );
-
-    // Construct y axis
-    // 
-    static inline const Vector3 yAxis( );
-
-    // Construct z axis
-    // 
-    static inline const Vector3 zAxis( );
-
-};
-
-// Multiply a 3-D vector by a scalar
-// 
-inline const Vector3 operator *( vec_float4 scalar, const Vector3 & vec );
-
-// Multiply two 3-D vectors per element
-// 
-inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Divide two 3-D vectors per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Compute the reciprocal of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Vector3 recipPerElem( const Vector3 & vec );
-
-// Compute the square root of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Vector3 sqrtPerElem( const Vector3 & vec );
-
-// Compute the reciprocal square root of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Vector3 rsqrtPerElem( const Vector3 & vec );
-
-// Compute the absolute value of a 3-D vector per element
-// 
-inline const Vector3 absPerElem( const Vector3 & vec );
-
-// Copy sign from one 3-D vector to another, per element
-// 
-inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Maximum of two 3-D vectors per element
-// 
-inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Minimum of two 3-D vectors per element
-// 
-inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Maximum element of a 3-D vector
-// 
-inline vec_float4 maxElem( const Vector3 & vec );
-
-// Minimum element of a 3-D vector
-// 
-inline vec_float4 minElem( const Vector3 & vec );
-
-// Compute the sum of all elements of a 3-D vector
-// 
-inline vec_float4 sum( const Vector3 & vec );
-
-// Compute the dot product of two 3-D vectors
-// 
-inline vec_float4 dot( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Compute the square of the length of a 3-D vector
-// 
-inline vec_float4 lengthSqr( const Vector3 & vec );
-
-// Compute the length of a 3-D vector
-// 
-inline vec_float4 length( const Vector3 & vec );
-
-// Normalize a 3-D vector
-// NOTE: 
-// The result is unpredictable when all elements of vec are at or near zero.
-// 
-inline const Vector3 normalize( const Vector3 & vec );
-
-// Compute cross product of two 3-D vectors
-// 
-inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Outer product of two 3-D vectors
-// 
-inline const Matrix3 outer( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Pre-multiply a row vector by a 3x3 matrix
-// 
-inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat );
-
-// Cross-product matrix of a 3-D vector
-// 
-inline const Matrix3 crossMatrix( const Vector3 & vec );
-
-// Create cross-product matrix and multiply
-// NOTE: 
-// Faster than separately creating a cross-product matrix and multiplying.
-// 
-inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat );
-
-// Linear interpolation between two 3-D vectors
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 lerp( vec_float4 t, const Vector3 & vec0, const Vector3 & vec1 );
-
-// Spherical linear interpolation between two 3-D vectors
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 slerp( vec_float4 t, const Vector3 & unitVec0, const Vector3 & unitVec1 );
-
-// Conditionally select between two 3-D vectors
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, vec_uint4 select1 );
-
-// Load four three-float 3-D vectors, stored in three quadwords
-// 
-inline void loadXYZArray( Vector3 & vec, const vec_float4 * threeQuads );
-
-// Store four slots of an SoA 3-D vector in three quadwords
-// 
-inline void storeXYZArray( const Vector3 & vec, vec_float4 * threeQuads );
-
-// Store eight slots of two SoA 3-D vectors as half-floats
-// 
-inline void storeHalfFloats( const Vector3 & vec0, const Vector3 & vec1, vec_ushort8 * threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3-D vector
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector3 & vec );
-
-// Print a 3-D vector and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector3 & vec, const char * name );
-
-#endif
-
-// A set of four 4-D vectors in structure-of-arrays format
-//
-class Vector4
-{
-    typedef vec_float4 vec_float4_t;
-    vec_float4 mX;
-    vec_float4 mY;
-    vec_float4 mZ;
-    vec_float4 mW;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Vector4( ) { };
-
-    // Copy a 4-D vector
-    // 
-    inline Vector4( const Vector4 & vec );
-
-    // Construct a 4-D vector from x, y, z, and w elements
-    // 
-    inline Vector4( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
-
-    // Construct a 4-D vector from a 3-D vector and a scalar
-    // 
-    inline Vector4( const Vector3 & xyz, vec_float4 w );
-
-    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
-    // 
-    explicit inline Vector4( const Vector3 & vec );
-
-    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
-    // 
-    explicit inline Vector4( const Point3 & pnt );
-
-    // Copy elements from a quaternion into a 4-D vector
-    // 
-    explicit inline Vector4( const Quat & quat );
-
-    // Set all elements of a 4-D vector to the same scalar value
-    // 
-    explicit inline Vector4( vec_float4 scalar );
-
-    // Replicate an AoS 4-D vector
-    // 
-    inline Vector4( Aos::Vector4 vec );
-
-    // Insert four AoS 4-D vectors
-    // 
-    inline Vector4( Aos::Vector4 vec0, Aos::Vector4 vec1, Aos::Vector4 vec2, Aos::Vector4 vec3 );
-
-    // Extract four AoS 4-D vectors
-    // 
-    inline void get4Aos( Aos::Vector4 & result0, Aos::Vector4 & result1, Aos::Vector4 & result2, Aos::Vector4 & result3 ) const;
-
-    // Assign one 4-D vector to another
-    // 
-    inline Vector4 & operator =( const Vector4 & vec );
-
-    // Set the x, y, and z elements of a 4-D vector
-    // NOTE: 
-    // This function does not change the w element.
-    // 
-    inline Vector4 & setXYZ( const Vector3 & vec );
-
-    // Get the x, y, and z elements of a 4-D vector
-    // 
-    inline const Vector3 getXYZ( ) const;
-
-    // Set the x element of a 4-D vector
-    // 
-    inline Vector4 & setX( vec_float4 x );
-
-    // Set the y element of a 4-D vector
-    // 
-    inline Vector4 & setY( vec_float4 y );
-
-    // Set the z element of a 4-D vector
-    // 
-    inline Vector4 & setZ( vec_float4 z );
-
-    // Set the w element of a 4-D vector
-    // 
-    inline Vector4 & setW( vec_float4 w );
-
-    // Get the x element of a 4-D vector
-    // 
-    inline vec_float4 getX( ) const;
-
-    // Get the y element of a 4-D vector
-    // 
-    inline vec_float4 getY( ) const;
-
-    // Get the z element of a 4-D vector
-    // 
-    inline vec_float4 getZ( ) const;
-
-    // Get the w element of a 4-D vector
-    // 
-    inline vec_float4 getW( ) const;
-
-    // Set an x, y, z, or w element of a 4-D vector by index
-    // 
-    inline Vector4 & setElem( int idx, vec_float4 value );
-
-    // Get an x, y, z, or w element of a 4-D vector by index
-    // 
-    inline vec_float4 getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline vec_float4_t & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline vec_float4 operator []( int idx ) const;
-
-    // Add two 4-D vectors
-    // 
-    inline const Vector4 operator +( const Vector4 & vec ) const;
-
-    // Subtract a 4-D vector from another 4-D vector
-    // 
-    inline const Vector4 operator -( const Vector4 & vec ) const;
-
-    // Multiply a 4-D vector by a scalar
-    // 
-    inline const Vector4 operator *( vec_float4 scalar ) const;
-
-    // Divide a 4-D vector by a scalar
-    // 
-    inline const Vector4 operator /( vec_float4 scalar ) const;
-
-    // Perform compound assignment and addition with a 4-D vector
-    // 
-    inline Vector4 & operator +=( const Vector4 & vec );
-
-    // Perform compound assignment and subtraction by a 4-D vector
-    // 
-    inline Vector4 & operator -=( const Vector4 & vec );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Vector4 & operator *=( vec_float4 scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Vector4 & operator /=( vec_float4 scalar );
-
-    // Negate all elements of a 4-D vector
-    // 
-    inline const Vector4 operator -( ) const;
-
-    // Construct x axis
-    // 
-    static inline const Vector4 xAxis( );
-
-    // Construct y axis
-    // 
-    static inline const Vector4 yAxis( );
-
-    // Construct z axis
-    // 
-    static inline const Vector4 zAxis( );
-
-    // Construct w axis
-    // 
-    static inline const Vector4 wAxis( );
-
-};
-
-// Multiply a 4-D vector by a scalar
-// 
-inline const Vector4 operator *( vec_float4 scalar, const Vector4 & vec );
-
-// Multiply two 4-D vectors per element
-// 
-inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Divide two 4-D vectors per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Compute the reciprocal of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Vector4 recipPerElem( const Vector4 & vec );
-
-// Compute the square root of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Vector4 sqrtPerElem( const Vector4 & vec );
-
-// Compute the reciprocal square root of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Vector4 rsqrtPerElem( const Vector4 & vec );
-
-// Compute the absolute value of a 4-D vector per element
-// 
-inline const Vector4 absPerElem( const Vector4 & vec );
-
-// Copy sign from one 4-D vector to another, per element
-// 
-inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Maximum of two 4-D vectors per element
-// 
-inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Minimum of two 4-D vectors per element
-// 
-inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Maximum element of a 4-D vector
-// 
-inline vec_float4 maxElem( const Vector4 & vec );
-
-// Minimum element of a 4-D vector
-// 
-inline vec_float4 minElem( const Vector4 & vec );
-
-// Compute the sum of all elements of a 4-D vector
-// 
-inline vec_float4 sum( const Vector4 & vec );
-
-// Compute the dot product of two 4-D vectors
-// 
-inline vec_float4 dot( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Compute the square of the length of a 4-D vector
-// 
-inline vec_float4 lengthSqr( const Vector4 & vec );
-
-// Compute the length of a 4-D vector
-// 
-inline vec_float4 length( const Vector4 & vec );
-
-// Normalize a 4-D vector
-// NOTE: 
-// The result is unpredictable when all elements of vec are at or near zero.
-// 
-inline const Vector4 normalize( const Vector4 & vec );
-
-// Outer product of two 4-D vectors
-// 
-inline const Matrix4 outer( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Linear interpolation between two 4-D vectors
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 lerp( vec_float4 t, const Vector4 & vec0, const Vector4 & vec1 );
-
-// Spherical linear interpolation between two 4-D vectors
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 slerp( vec_float4 t, const Vector4 & unitVec0, const Vector4 & unitVec1 );
-
-// Conditionally select between two 4-D vectors
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, vec_uint4 select1 );
-
-// Store four slots of an SoA 4-D vector as half-floats
-// 
-inline void storeHalfFloats( const Vector4 & vec, vec_ushort8 * twoQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 4-D vector
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector4 & vec );
-
-// Print a 4-D vector and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector4 & vec, const char * name );
-
-#endif
-
-// A set of four 3-D points in structure-of-arrays format
-//
-class Point3
-{
-    typedef vec_float4 vec_float4_t;
-    vec_float4 mX;
-    vec_float4 mY;
-    vec_float4 mZ;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Point3( ) { };
-
-    // Copy a 3-D point
-    // 
-    inline Point3( const Point3 & pnt );
-
-    // Construct a 3-D point from x, y, and z elements
-    // 
-    inline Point3( vec_float4 x, vec_float4 y, vec_float4 z );
-
-    // Copy elements from a 3-D vector into a 3-D point
-    // 
-    explicit inline Point3( const Vector3 & vec );
-
-    // Set all elements of a 3-D point to the same scalar value
-    // 
-    explicit inline Point3( vec_float4 scalar );
-
-    // Replicate an AoS 3-D point
-    // 
-    inline Point3( Aos::Point3 pnt );
-
-    // Insert four AoS 3-D points
-    // 
-    inline Point3( Aos::Point3 pnt0, Aos::Point3 pnt1, Aos::Point3 pnt2, Aos::Point3 pnt3 );
-
-    // Extract four AoS 3-D points
-    // 
-    inline void get4Aos( Aos::Point3 & result0, Aos::Point3 & result1, Aos::Point3 & result2, Aos::Point3 & result3 ) const;
-
-    // Assign one 3-D point to another
-    // 
-    inline Point3 & operator =( const Point3 & pnt );
-
-    // Set the x element of a 3-D point
-    // 
-    inline Point3 & setX( vec_float4 x );
-
-    // Set the y element of a 3-D point
-    // 
-    inline Point3 & setY( vec_float4 y );
-
-    // Set the z element of a 3-D point
-    // 
-    inline Point3 & setZ( vec_float4 z );
-
-    // Get the x element of a 3-D point
-    // 
-    inline vec_float4 getX( ) const;
-
-    // Get the y element of a 3-D point
-    // 
-    inline vec_float4 getY( ) const;
-
-    // Get the z element of a 3-D point
-    // 
-    inline vec_float4 getZ( ) const;
-
-    // Set an x, y, or z element of a 3-D point by index
-    // 
-    inline Point3 & setElem( int idx, vec_float4 value );
-
-    // Get an x, y, or z element of a 3-D point by index
-    // 
-    inline vec_float4 getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline vec_float4_t & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline vec_float4 operator []( int idx ) const;
-
-    // Subtract a 3-D point from another 3-D point
-    // 
-    inline const Vector3 operator -( const Point3 & pnt ) const;
-
-    // Add a 3-D point to a 3-D vector
-    // 
-    inline const Point3 operator +( const Vector3 & vec ) const;
-
-    // Subtract a 3-D vector from a 3-D point
-    // 
-    inline const Point3 operator -( const Vector3 & vec ) const;
-
-    // Perform compound assignment and addition with a 3-D vector
-    // 
-    inline Point3 & operator +=( const Vector3 & vec );
-
-    // Perform compound assignment and subtraction by a 3-D vector
-    // 
-    inline Point3 & operator -=( const Vector3 & vec );
-
-};
-
-// Multiply two 3-D points per element
-// 
-inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Divide two 3-D points per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Compute the reciprocal of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Point3 recipPerElem( const Point3 & pnt );
-
-// Compute the square root of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Point3 sqrtPerElem( const Point3 & pnt );
-
-// Compute the reciprocal square root of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Point3 rsqrtPerElem( const Point3 & pnt );
-
-// Compute the absolute value of a 3-D point per element
-// 
-inline const Point3 absPerElem( const Point3 & pnt );
-
-// Copy sign from one 3-D point to another, per element
-// 
-inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Maximum of two 3-D points per element
-// 
-inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Minimum of two 3-D points per element
-// 
-inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Maximum element of a 3-D point
-// 
-inline vec_float4 maxElem( const Point3 & pnt );
-
-// Minimum element of a 3-D point
-// 
-inline vec_float4 minElem( const Point3 & pnt );
-
-// Compute the sum of all elements of a 3-D point
-// 
-inline vec_float4 sum( const Point3 & pnt );
-
-// Apply uniform scale to a 3-D point
-// 
-inline const Point3 scale( const Point3 & pnt, vec_float4 scaleVal );
-
-// Apply non-uniform scale to a 3-D point
-// 
-inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec );
-
-// Scalar projection of a 3-D point on a unit-length 3-D vector
-// 
-inline vec_float4 projection( const Point3 & pnt, const Vector3 & unitVec );
-
-// Compute the square of the distance of a 3-D point from the coordinate-system origin
-// 
-inline vec_float4 distSqrFromOrigin( const Point3 & pnt );
-
-// Compute the distance of a 3-D point from the coordinate-system origin
-// 
-inline vec_float4 distFromOrigin( const Point3 & pnt );
-
-// Compute the square of the distance between two 3-D points
-// 
-inline vec_float4 distSqr( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Compute the distance between two 3-D points
-// 
-inline vec_float4 dist( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Linear interpolation between two 3-D points
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Point3 lerp( vec_float4 t, const Point3 & pnt0, const Point3 & pnt1 );
-
-// Conditionally select between two 3-D points
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, vec_uint4 select1 );
-
-// Load four three-float 3-D points, stored in three quadwords
-// 
-inline void loadXYZArray( Point3 & pnt, const vec_float4 * threeQuads );
-
-// Store four slots of an SoA 3-D point in three quadwords
-// 
-inline void storeXYZArray( const Point3 & pnt, vec_float4 * threeQuads );
-
-// Store eight slots of two SoA 3-D points as half-floats
-// 
-inline void storeHalfFloats( const Point3 & pnt0, const Point3 & pnt1, vec_ushort8 * threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3-D point
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Point3 & pnt );
-
-// Print a 3-D point and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Point3 & pnt, const char * name );
-
-#endif
-
-// A set of four quaternions in structure-of-arrays format
-//
-class Quat
-{
-    typedef vec_float4 vec_float4_t;
-    vec_float4 mX;
-    vec_float4 mY;
-    vec_float4 mZ;
-    vec_float4 mW;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Quat( ) { };
-
-    // Copy a quaternion
-    // 
-    inline Quat( const Quat & quat );
-
-    // Construct a quaternion from x, y, z, and w elements
-    // 
-    inline Quat( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
-
-    // Construct a quaternion from a 3-D vector and a scalar
-    // 
-    inline Quat( const Vector3 & xyz, vec_float4 w );
-
-    // Copy elements from a 4-D vector into a quaternion
-    // 
-    explicit inline Quat( const Vector4 & vec );
-
-    // Convert a rotation matrix to a unit-length quaternion
-    // 
-    explicit inline Quat( const Matrix3 & rotMat );
-
-    // Set all elements of a quaternion to the same scalar value
-    // 
-    explicit inline Quat( vec_float4 scalar );
-
-    // Replicate an AoS quaternion
-    // 
-    inline Quat( Aos::Quat quat );
-
-    // Insert four AoS quaternions
-    // 
-    inline Quat( Aos::Quat quat0, Aos::Quat quat1, Aos::Quat quat2, Aos::Quat quat3 );
-
-    // Extract four AoS quaternions
-    // 
-    inline void get4Aos( Aos::Quat & result0, Aos::Quat & result1, Aos::Quat & result2, Aos::Quat & result3 ) const;
-
-    // Assign one quaternion to another
-    // 
-    inline Quat & operator =( const Quat & quat );
-
-    // Set the x, y, and z elements of a quaternion
-    // NOTE: 
-    // This function does not change the w element.
-    // 
-    inline Quat & setXYZ( const Vector3 & vec );
-
-    // Get the x, y, and z elements of a quaternion
-    // 
-    inline const Vector3 getXYZ( ) const;
-
-    // Set the x element of a quaternion
-    // 
-    inline Quat & setX( vec_float4 x );
-
-    // Set the y element of a quaternion
-    // 
-    inline Quat & setY( vec_float4 y );
-
-    // Set the z element of a quaternion
-    // 
-    inline Quat & setZ( vec_float4 z );
-
-    // Set the w element of a quaternion
-    // 
-    inline Quat & setW( vec_float4 w );
-
-    // Get the x element of a quaternion
-    // 
-    inline vec_float4 getX( ) const;
-
-    // Get the y element of a quaternion
-    // 
-    inline vec_float4 getY( ) const;
-
-    // Get the z element of a quaternion
-    // 
-    inline vec_float4 getZ( ) const;
-
-    // Get the w element of a quaternion
-    // 
-    inline vec_float4 getW( ) const;
-
-    // Set an x, y, z, or w element of a quaternion by index
-    // 
-    inline Quat & setElem( int idx, vec_float4 value );
-
-    // Get an x, y, z, or w element of a quaternion by index
-    // 
-    inline vec_float4 getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline vec_float4_t & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline vec_float4 operator []( int idx ) const;
-
-    // Add two quaternions
-    // 
-    inline const Quat operator +( const Quat & quat ) const;
-
-    // Subtract a quaternion from another quaternion
-    // 
-    inline const Quat operator -( const Quat & quat ) const;
-
-    // Multiply two quaternions
-    // 
-    inline const Quat operator *( const Quat & quat ) const;
-
-    // Multiply a quaternion by a scalar
-    // 
-    inline const Quat operator *( vec_float4 scalar ) const;
-
-    // Divide a quaternion by a scalar
-    // 
-    inline const Quat operator /( vec_float4 scalar ) const;
-
-    // Perform compound assignment and addition with a quaternion
-    // 
-    inline Quat & operator +=( const Quat & quat );
-
-    // Perform compound assignment and subtraction by a quaternion
-    // 
-    inline Quat & operator -=( const Quat & quat );
-
-    // Perform compound assignment and multiplication by a quaternion
-    // 
-    inline Quat & operator *=( const Quat & quat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Quat & operator *=( vec_float4 scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Quat & operator /=( vec_float4 scalar );
-
-    // Negate all elements of a quaternion
-    // 
-    inline const Quat operator -( ) const;
-
-    // Construct an identity quaternion
-    // 
-    static inline const Quat identity( );
-
-    // Construct a quaternion to rotate between two unit-length 3-D vectors
-    // NOTE: 
-    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
-    // 
-    static inline const Quat rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 );
-
-    // Construct a quaternion to rotate around a unit-length 3-D vector
-    // 
-    static inline const Quat rotation( vec_float4 radians, const Vector3 & unitVec );
-
-    // Construct a quaternion to rotate around the x axis
-    // 
-    static inline const Quat rotationX( vec_float4 radians );
-
-    // Construct a quaternion to rotate around the y axis
-    // 
-    static inline const Quat rotationY( vec_float4 radians );
-
-    // Construct a quaternion to rotate around the z axis
-    // 
-    static inline const Quat rotationZ( vec_float4 radians );
-
-};
-
-// Multiply a quaternion by a scalar
-// 
-inline const Quat operator *( vec_float4 scalar, const Quat & quat );
-
-// Compute the conjugate of a quaternion
-// 
-inline const Quat conj( const Quat & quat );
-
-// Use a unit-length quaternion to rotate a 3-D vector
-// 
-inline const Vector3 rotate( const Quat & unitQuat, const Vector3 & vec );
-
-// Compute the dot product of two quaternions
-// 
-inline vec_float4 dot( const Quat & quat0, const Quat & quat1 );
-
-// Compute the norm of a quaternion
-// 
-inline vec_float4 norm( const Quat & quat );
-
-// Compute the length of a quaternion
-// 
-inline vec_float4 length( const Quat & quat );
-
-// Normalize a quaternion
-// NOTE: 
-// The result is unpredictable when all elements of quat are at or near zero.
-// 
-inline const Quat normalize( const Quat & quat );
-
-// Linear interpolation between two quaternions
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat lerp( vec_float4 t, const Quat & quat0, const Quat & quat1 );
-
-// Spherical linear interpolation between two quaternions
-// NOTE: 
-// Interpolates along the shortest path between orientations.
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat slerp( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1 );
-
-// Spherical quadrangle interpolation
-// 
-inline const Quat squad( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 );
-
-// Conditionally select between two quaternions
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Quat select( const Quat & quat0, const Quat & quat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a quaternion
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Quat & quat );
-
-// Print a quaternion and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Quat & quat, const char * name );
-
-#endif
-
-// A set of four 3x3 matrices in structure-of-arrays format
-//
-class Matrix3
-{
-    Vector3 mCol0;
-    Vector3 mCol1;
-    Vector3 mCol2;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Matrix3( ) { };
-
-    // Copy a 3x3 matrix
-    // 
-    inline Matrix3( const Matrix3 & mat );
-
-    // Construct a 3x3 matrix containing the specified columns
-    // 
-    inline Matrix3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2 );
-
-    // Construct a 3x3 rotation matrix from a unit-length quaternion
-    // 
-    explicit inline Matrix3( const Quat & unitQuat );
-
-    // Set all elements of a 3x3 matrix to the same scalar value
-    // 
-    explicit inline Matrix3( vec_float4 scalar );
-
-    // Replicate an AoS 3x3 matrix
-    // 
-    inline Matrix3( const Aos::Matrix3 & mat );
-
-    // Insert four AoS 3x3 matrices
-    // 
-    inline Matrix3( const Aos::Matrix3 & mat0, const Aos::Matrix3 & mat1, const Aos::Matrix3 & mat2, const Aos::Matrix3 & mat3 );
-
-    // Extract four AoS 3x3 matrices
-    // 
-    inline void get4Aos( Aos::Matrix3 & result0, Aos::Matrix3 & result1, Aos::Matrix3 & result2, Aos::Matrix3 & result3 ) const;
-
-    // Assign one 3x3 matrix to another
-    // 
-    inline Matrix3 & operator =( const Matrix3 & mat );
-
-    // Set column 0 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol0( const Vector3 & col0 );
-
-    // Set column 1 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol1( const Vector3 & col1 );
-
-    // Set column 2 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol2( const Vector3 & col2 );
-
-    // Get column 0 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol0( ) const;
-
-    // Get column 1 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol1( ) const;
-
-    // Get column 2 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol2( ) const;
-
-    // Set the column of a 3x3 matrix referred to by the specified index
-    // 
-    inline Matrix3 & setCol( int col, const Vector3 & vec );
-
-    // Set the row of a 3x3 matrix referred to by the specified index
-    // 
-    inline Matrix3 & setRow( int row, const Vector3 & vec );
-
-    // Get the column of a 3x3 matrix referred to by the specified index
-    // 
-    inline const Vector3 getCol( int col ) const;
-
-    // Get the row of a 3x3 matrix referred to by the specified index
-    // 
-    inline const Vector3 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector3 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector3 operator []( int col ) const;
-
-    // Set the element of a 3x3 matrix referred to by column and row indices
-    // 
-    inline Matrix3 & setElem( int col, int row, vec_float4 val );
-
-    // Get the element of a 3x3 matrix referred to by column and row indices
-    // 
-    inline vec_float4 getElem( int col, int row ) const;
-
-    // Add two 3x3 matrices
-    // 
-    inline const Matrix3 operator +( const Matrix3 & mat ) const;
-
-    // Subtract a 3x3 matrix from another 3x3 matrix
-    // 
-    inline const Matrix3 operator -( const Matrix3 & mat ) const;
-
-    // Negate all elements of a 3x3 matrix
-    // 
-    inline const Matrix3 operator -( ) const;
-
-    // Multiply a 3x3 matrix by a scalar
-    // 
-    inline const Matrix3 operator *( vec_float4 scalar ) const;
-
-    // Multiply a 3x3 matrix by a 3-D vector
-    // 
-    inline const Vector3 operator *( const Vector3 & vec ) const;
-
-    // Multiply two 3x3 matrices
-    // 
-    inline const Matrix3 operator *( const Matrix3 & mat ) const;
-
-    // Perform compound assignment and addition with a 3x3 matrix
-    // 
-    inline Matrix3 & operator +=( const Matrix3 & mat );
-
-    // Perform compound assignment and subtraction by a 3x3 matrix
-    // 
-    inline Matrix3 & operator -=( const Matrix3 & mat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Matrix3 & operator *=( vec_float4 scalar );
-
-    // Perform compound assignment and multiplication by a 3x3 matrix
-    // 
-    inline Matrix3 & operator *=( const Matrix3 & mat );
-
-    // Construct an identity 3x3 matrix
-    // 
-    static inline const Matrix3 identity( );
-
-    // Construct a 3x3 matrix to rotate around the x axis
-    // 
-    static inline const Matrix3 rotationX( vec_float4 radians );
-
-    // Construct a 3x3 matrix to rotate around the y axis
-    // 
-    static inline const Matrix3 rotationY( vec_float4 radians );
-
-    // Construct a 3x3 matrix to rotate around the z axis
-    // 
-    static inline const Matrix3 rotationZ( vec_float4 radians );
-
-    // Construct a 3x3 matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Matrix3 rotationZYX( const Vector3 & radiansXYZ );
-
-    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Matrix3 rotation( vec_float4 radians, const Vector3 & unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Matrix3 rotation( const Quat & unitQuat );
-
-    // Construct a 3x3 matrix to perform scaling
-    // 
-    static inline const Matrix3 scale( const Vector3 & scaleVec );
-
-};
-// Multiply a 3x3 matrix by a scalar
-// 
-inline const Matrix3 operator *( vec_float4 scalar, const Matrix3 & mat );
-
-// Append (post-multiply) a scale transformation to a 3x3 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat );
-
-// Multiply two 3x3 matrices per element
-// 
-inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
-
-// Compute the absolute value of a 3x3 matrix per element
-// 
-inline const Matrix3 absPerElem( const Matrix3 & mat );
-
-// Transpose of a 3x3 matrix
-// 
-inline const Matrix3 transpose( const Matrix3 & mat );
-
-// Compute the inverse of a 3x3 matrix
-// NOTE: 
-// Result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix3 inverse( const Matrix3 & mat );
-
-// Determinant of a 3x3 matrix
-// 
-inline vec_float4 determinant( const Matrix3 & mat );
-
-// Conditionally select between two 3x3 matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3x3 matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix3 & mat );
-
-// Print a 3x3 matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix3 & mat, const char * name );
-
-#endif
-
-// A set of four 4x4 matrices in structure-of-arrays format
-//
-class Matrix4
-{
-    Vector4 mCol0;
-    Vector4 mCol1;
-    Vector4 mCol2;
-    Vector4 mCol3;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Matrix4( ) { };
-
-    // Copy a 4x4 matrix
-    // 
-    inline Matrix4( const Matrix4 & mat );
-
-    // Construct a 4x4 matrix containing the specified columns
-    // 
-    inline Matrix4( const Vector4 & col0, const Vector4 & col1, const Vector4 & col2, const Vector4 & col3 );
-
-    // Construct a 4x4 matrix from a 3x4 transformation matrix
-    // 
-    explicit inline Matrix4( const Transform3 & mat );
-
-    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
-    // 
-    inline Matrix4( const Matrix3 & mat, const Vector3 & translateVec );
-
-    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
-    // 
-    inline Matrix4( const Quat & unitQuat, const Vector3 & translateVec );
-
-    // Set all elements of a 4x4 matrix to the same scalar value
-    // 
-    explicit inline Matrix4( vec_float4 scalar );
-
-    // Replicate an AoS 4x4 matrix
-    // 
-    inline Matrix4( const Aos::Matrix4 & mat );
-
-    // Insert four AoS 4x4 matrices
-    // 
-    inline Matrix4( const Aos::Matrix4 & mat0, const Aos::Matrix4 & mat1, const Aos::Matrix4 & mat2, const Aos::Matrix4 & mat3 );
-
-    // Extract four AoS 4x4 matrices
-    // 
-    inline void get4Aos( Aos::Matrix4 & result0, Aos::Matrix4 & result1, Aos::Matrix4 & result2, Aos::Matrix4 & result3 ) const;
-
-    // Assign one 4x4 matrix to another
-    // 
-    inline Matrix4 & operator =( const Matrix4 & mat );
-
-    // Set the upper-left 3x3 submatrix
-    // NOTE: 
-    // This function does not change the bottom row elements.
-    // 
-    inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
-
-    // Get the upper-left 3x3 submatrix of a 4x4 matrix
-    // 
-    inline const Matrix3 getUpper3x3( ) const;
-
-    // Set translation component
-    // NOTE: 
-    // This function does not change the bottom row elements.
-    // 
-    inline Matrix4 & setTranslation( const Vector3 & translateVec );
-
-    // Get the translation component of a 4x4 matrix
-    // 
-    inline const Vector3 getTranslation( ) const;
-
-    // Set column 0 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol0( const Vector4 & col0 );
-
-    // Set column 1 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol1( const Vector4 & col1 );
-
-    // Set column 2 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol2( const Vector4 & col2 );
-
-    // Set column 3 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol3( const Vector4 & col3 );
-
-    // Get column 0 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol0( ) const;
-
-    // Get column 1 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol1( ) const;
-
-    // Get column 2 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol2( ) const;
-
-    // Get column 3 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol3( ) const;
-
-    // Set the column of a 4x4 matrix referred to by the specified index
-    // 
-    inline Matrix4 & setCol( int col, const Vector4 & vec );
-
-    // Set the row of a 4x4 matrix referred to by the specified index
-    // 
-    inline Matrix4 & setRow( int row, const Vector4 & vec );
-
-    // Get the column of a 4x4 matrix referred to by the specified index
-    // 
-    inline const Vector4 getCol( int col ) const;
-
-    // Get the row of a 4x4 matrix referred to by the specified index
-    // 
-    inline const Vector4 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector4 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector4 operator []( int col ) const;
-
-    // Set the element of a 4x4 matrix referred to by column and row indices
-    // 
-    inline Matrix4 & setElem( int col, int row, vec_float4 val );
-
-    // Get the element of a 4x4 matrix referred to by column and row indices
-    // 
-    inline vec_float4 getElem( int col, int row ) const;
-
-    // Add two 4x4 matrices
-    // 
-    inline const Matrix4 operator +( const Matrix4 & mat ) const;
-
-    // Subtract a 4x4 matrix from another 4x4 matrix
-    // 
-    inline const Matrix4 operator -( const Matrix4 & mat ) const;
-
-    // Negate all elements of a 4x4 matrix
-    // 
-    inline const Matrix4 operator -( ) const;
-
-    // Multiply a 4x4 matrix by a scalar
-    // 
-    inline const Matrix4 operator *( vec_float4 scalar ) const;
-
-    // Multiply a 4x4 matrix by a 4-D vector
-    // 
-    inline const Vector4 operator *( const Vector4 & vec ) const;
-
-    // Multiply a 4x4 matrix by a 3-D vector
-    // 
-    inline const Vector4 operator *( const Vector3 & vec ) const;
-
-    // Multiply a 4x4 matrix by a 3-D point
-    // 
-    inline const Vector4 operator *( const Point3 & pnt ) const;
-
-    // Multiply two 4x4 matrices
-    // 
-    inline const Matrix4 operator *( const Matrix4 & mat ) const;
-
-    // Multiply a 4x4 matrix by a 3x4 transformation matrix
-    // 
-    inline const Matrix4 operator *( const Transform3 & tfrm ) const;
-
-    // Perform compound assignment and addition with a 4x4 matrix
-    // 
-    inline Matrix4 & operator +=( const Matrix4 & mat );
-
-    // Perform compound assignment and subtraction by a 4x4 matrix
-    // 
-    inline Matrix4 & operator -=( const Matrix4 & mat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Matrix4 & operator *=( vec_float4 scalar );
-
-    // Perform compound assignment and multiplication by a 4x4 matrix
-    // 
-    inline Matrix4 & operator *=( const Matrix4 & mat );
-
-    // Perform compound assignment and multiplication by a 3x4 transformation matrix
-    // 
-    inline Matrix4 & operator *=( const Transform3 & tfrm );
-
-    // Construct an identity 4x4 matrix
-    // 
-    static inline const Matrix4 identity( );
-
-    // Construct a 4x4 matrix to rotate around the x axis
-    // 
-    static inline const Matrix4 rotationX( vec_float4 radians );
-
-    // Construct a 4x4 matrix to rotate around the y axis
-    // 
-    static inline const Matrix4 rotationY( vec_float4 radians );
-
-    // Construct a 4x4 matrix to rotate around the z axis
-    // 
-    static inline const Matrix4 rotationZ( vec_float4 radians );
-
-    // Construct a 4x4 matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Matrix4 rotationZYX( const Vector3 & radiansXYZ );
-
-    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Matrix4 rotation( vec_float4 radians, const Vector3 & unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Matrix4 rotation( const Quat & unitQuat );
-
-    // Construct a 4x4 matrix to perform scaling
-    // 
-    static inline const Matrix4 scale( const Vector3 & scaleVec );
-
-    // Construct a 4x4 matrix to perform translation
-    // 
-    static inline const Matrix4 translation( const Vector3 & translateVec );
-
-    // Construct viewing matrix based on eye position, position looked at, and up direction
-    // 
-    static inline const Matrix4 lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec );
-
-    // Construct a perspective projection matrix
-    // 
-    static inline const Matrix4 perspective( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar );
-
-    // Construct a perspective projection matrix based on frustum
-    // 
-    static inline const Matrix4 frustum( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
-
-    // Construct an orthographic projection matrix
-    // 
-    static inline const Matrix4 orthographic( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
-
-};
-// Multiply a 4x4 matrix by a scalar
-// 
-inline const Matrix4 operator *( vec_float4 scalar, const Matrix4 & mat );
-
-// Append (post-multiply) a scale transformation to a 4x4 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat );
-
-// Multiply two 4x4 matrices per element
-// 
-inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
-
-// Compute the absolute value of a 4x4 matrix per element
-// 
-inline const Matrix4 absPerElem( const Matrix4 & mat );
-
-// Transpose of a 4x4 matrix
-// 
-inline const Matrix4 transpose( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix
-// NOTE: 
-// Result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix4 inverse( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix4 affineInverse( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
-// 
-inline const Matrix4 orthoInverse( const Matrix4 & mat );
-
-// Determinant of a 4x4 matrix
-// 
-inline vec_float4 determinant( const Matrix4 & mat );
-
-// Conditionally select between two 4x4 matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 4x4 matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix4 & mat );
-
-// Print a 4x4 matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix4 & mat, const char * name );
-
-#endif
-
-// A set of four 3x4 transformation matrices in structure-of-arrays format
-//
-class Transform3
-{
-    Vector3 mCol0;
-    Vector3 mCol1;
-    Vector3 mCol2;
-    Vector3 mCol3;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Transform3( ) { };
-
-    // Copy a 3x4 transformation matrix
-    // 
-    inline Transform3( const Transform3 & tfrm );
-
-    // Construct a 3x4 transformation matrix containing the specified columns
-    // 
-    inline Transform3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2, const Vector3 & col3 );
-
-    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
-    // 
-    inline Transform3( const Matrix3 & tfrm, const Vector3 & translateVec );
-
-    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
-    // 
-    inline Transform3( const Quat & unitQuat, const Vector3 & translateVec );
-
-    // Set all elements of a 3x4 transformation matrix to the same scalar value
-    // 
-    explicit inline Transform3( vec_float4 scalar );
-
-    // Replicate an AoS 3x4 transformation matrix
-    // 
-    inline Transform3( const Aos::Transform3 & tfrm );
-
-    // Insert four AoS 3x4 transformation matrices
-    // 
-    inline Transform3( const Aos::Transform3 & tfrm0, const Aos::Transform3 & tfrm1, const Aos::Transform3 & tfrm2, const Aos::Transform3 & tfrm3 );
-
-    // Extract four AoS 3x4 transformation matrices
-    // 
-    inline void get4Aos( Aos::Transform3 & result0, Aos::Transform3 & result1, Aos::Transform3 & result2, Aos::Transform3 & result3 ) const;
-
-    // Assign one 3x4 transformation matrix to another
-    // 
-    inline Transform3 & operator =( const Transform3 & tfrm );
-
-    // Set the upper-left 3x3 submatrix
-    // 
-    inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
-
-    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
-    // 
-    inline const Matrix3 getUpper3x3( ) const;
-
-    // Set translation component
-    // 
-    inline Transform3 & setTranslation( const Vector3 & translateVec );
-
-    // Get the translation component of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getTranslation( ) const;
-
-    // Set column 0 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol0( const Vector3 & col0 );
-
-    // Set column 1 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol1( const Vector3 & col1 );
-
-    // Set column 2 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol2( const Vector3 & col2 );
-
-    // Set column 3 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol3( const Vector3 & col3 );
-
-    // Get column 0 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol0( ) const;
-
-    // Get column 1 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol1( ) const;
-
-    // Get column 2 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol2( ) const;
-
-    // Get column 3 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol3( ) const;
-
-    // Set the column of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline Transform3 & setCol( int col, const Vector3 & vec );
-
-    // Set the row of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline Transform3 & setRow( int row, const Vector4 & vec );
-
-    // Get the column of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline const Vector3 getCol( int col ) const;
-
-    // Get the row of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline const Vector4 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector3 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector3 operator []( int col ) const;
-
-    // Set the element of a 3x4 transformation matrix referred to by column and row indices
-    // 
-    inline Transform3 & setElem( int col, int row, vec_float4 val );
-
-    // Get the element of a 3x4 transformation matrix referred to by column and row indices
-    // 
-    inline vec_float4 getElem( int col, int row ) const;
-
-    // Multiply a 3x4 transformation matrix by a 3-D vector
-    // 
-    inline const Vector3 operator *( const Vector3 & vec ) const;
-
-    // Multiply a 3x4 transformation matrix by a 3-D point
-    // 
-    inline const Point3 operator *( const Point3 & pnt ) const;
-
-    // Multiply two 3x4 transformation matrices
-    // 
-    inline const Transform3 operator *( const Transform3 & tfrm ) const;
-
-    // Perform compound assignment and multiplication by a 3x4 transformation matrix
-    // 
-    inline Transform3 & operator *=( const Transform3 & tfrm );
-
-    // Construct an identity 3x4 transformation matrix
-    // 
-    static inline const Transform3 identity( );
-
-    // Construct a 3x4 transformation matrix to rotate around the x axis
-    // 
-    static inline const Transform3 rotationX( vec_float4 radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the y axis
-    // 
-    static inline const Transform3 rotationY( vec_float4 radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the z axis
-    // 
-    static inline const Transform3 rotationZ( vec_float4 radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Transform3 rotationZYX( const Vector3 & radiansXYZ );
-
-    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Transform3 rotation( vec_float4 radians, const Vector3 & unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Transform3 rotation( const Quat & unitQuat );
-
-    // Construct a 3x4 transformation matrix to perform scaling
-    // 
-    static inline const Transform3 scale( const Vector3 & scaleVec );
-
-    // Construct a 3x4 transformation matrix to perform translation
-    // 
-    static inline const Transform3 translation( const Vector3 & translateVec );
-
-};
-// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm );
-
-// Multiply two 3x4 transformation matrices per element
-// 
-inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
-
-// Compute the absolute value of a 3x4 transformation matrix per element
-// 
-inline const Transform3 absPerElem( const Transform3 & tfrm );
-
-// Inverse of a 3x4 transformation matrix
-// NOTE: 
-// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
-// 
-inline const Transform3 inverse( const Transform3 & tfrm );
-
-// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
-// 
-inline const Transform3 orthoInverse( const Transform3 & tfrm );
-
-// Conditionally select between two 3x4 transformation matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3x4 transformation matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Transform3 & tfrm );
-
-// Print a 3x4 transformation matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Transform3 & tfrm, const char * name );
-
-#endif
-
-} // namespace Soa
-} // namespace Vectormath
-
-#include "vec_soa.h"
-#include "quat_soa.h"
-#include "mat_soa.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_SOA_CPP_SPU_H
+#define _VECTORMATH_SOA_CPP_SPU_H
+
+#include <math.h>
+#include <spu_intrinsics.h>
+#include "floatInVec.h"
+#include "boolInVec.h"
+#include "vectormath_aos.h"
+#include <stdio.h>
+
+#ifdef _VECTORMATH_DEBUG
+#endif
+
+namespace Vectormath {
+
+namespace Soa {
+
+//-----------------------------------------------------------------------------
+// Forward Declarations
+//
+
+class Vector3;
+class Vector4;
+class Point3;
+class Quat;
+class Matrix3;
+class Matrix4;
+class Transform3;
+
+// A set of four 3-D vectors in structure-of-arrays format
+//
+class Vector3
+{
+    typedef vec_float4 vec_float4_t;
+    vec_float4 mX;
+    vec_float4 mY;
+    vec_float4 mZ;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector3( ) { };
+
+    // Copy a 3-D vector
+    // 
+    inline Vector3( const Vector3 & vec );
+
+    // Construct a 3-D vector from x, y, and z elements
+    // 
+    inline Vector3( vec_float4 x, vec_float4 y, vec_float4 z );
+
+    // Copy elements from a 3-D point into a 3-D vector
+    // 
+    explicit inline Vector3( const Point3 & pnt );
+
+    // Set all elements of a 3-D vector to the same scalar value
+    // 
+    explicit inline Vector3( vec_float4 scalar );
+
+    // Replicate an AoS 3-D vector
+    // 
+    inline Vector3( Aos::Vector3 vec );
+
+    // Insert four AoS 3-D vectors
+    // 
+    inline Vector3( Aos::Vector3 vec0, Aos::Vector3 vec1, Aos::Vector3 vec2, Aos::Vector3 vec3 );
+
+    // Extract four AoS 3-D vectors
+    // 
+    inline void get4Aos( Aos::Vector3 & result0, Aos::Vector3 & result1, Aos::Vector3 & result2, Aos::Vector3 & result3 ) const;
+
+    // Assign one 3-D vector to another
+    // 
+    inline Vector3 & operator =( const Vector3 & vec );
+
+    // Set the x element of a 3-D vector
+    // 
+    inline Vector3 & setX( vec_float4 x );
+
+    // Set the y element of a 3-D vector
+    // 
+    inline Vector3 & setY( vec_float4 y );
+
+    // Set the z element of a 3-D vector
+    // 
+    inline Vector3 & setZ( vec_float4 z );
+
+    // Get the x element of a 3-D vector
+    // 
+    inline vec_float4 getX( ) const;
+
+    // Get the y element of a 3-D vector
+    // 
+    inline vec_float4 getY( ) const;
+
+    // Get the z element of a 3-D vector
+    // 
+    inline vec_float4 getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D vector by index
+    // 
+    inline Vector3 & setElem( int idx, vec_float4 value );
+
+    // Get an x, y, or z element of a 3-D vector by index
+    // 
+    inline vec_float4 getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline vec_float4_t & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline vec_float4 operator []( int idx ) const;
+
+    // Add two 3-D vectors
+    // 
+    inline const Vector3 operator +( const Vector3 & vec ) const;
+
+    // Subtract a 3-D vector from another 3-D vector
+    // 
+    inline const Vector3 operator -( const Vector3 & vec ) const;
+
+    // Add a 3-D vector to a 3-D point
+    // 
+    inline const Point3 operator +( const Point3 & pnt ) const;
+
+    // Multiply a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator *( vec_float4 scalar ) const;
+
+    // Divide a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator /( vec_float4 scalar ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Vector3 & operator +=( const Vector3 & vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Vector3 & operator -=( const Vector3 & vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector3 & operator *=( vec_float4 scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector3 & operator /=( vec_float4 scalar );
+
+    // Negate all elements of a 3-D vector
+    // 
+    inline const Vector3 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector3 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector3 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector3 zAxis( );
+
+};
+
+// Multiply a 3-D vector by a scalar
+// 
+inline const Vector3 operator *( vec_float4 scalar, const Vector3 & vec );
+
+// Multiply two 3-D vectors per element
+// 
+inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Divide two 3-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Compute the reciprocal of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector3 recipPerElem( const Vector3 & vec );
+
+// Compute the square root of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Vector3 sqrtPerElem( const Vector3 & vec );
+
+// Compute the reciprocal square root of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Vector3 rsqrtPerElem( const Vector3 & vec );
+
+// Compute the absolute value of a 3-D vector per element
+// 
+inline const Vector3 absPerElem( const Vector3 & vec );
+
+// Copy sign from one 3-D vector to another, per element
+// 
+inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Maximum of two 3-D vectors per element
+// 
+inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Minimum of two 3-D vectors per element
+// 
+inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Maximum element of a 3-D vector
+// 
+inline vec_float4 maxElem( const Vector3 & vec );
+
+// Minimum element of a 3-D vector
+// 
+inline vec_float4 minElem( const Vector3 & vec );
+
+// Compute the sum of all elements of a 3-D vector
+// 
+inline vec_float4 sum( const Vector3 & vec );
+
+// Compute the dot product of two 3-D vectors
+// 
+inline vec_float4 dot( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Compute the square of the length of a 3-D vector
+// 
+inline vec_float4 lengthSqr( const Vector3 & vec );
+
+// Compute the length of a 3-D vector
+// 
+inline vec_float4 length( const Vector3 & vec );
+
+// Normalize a 3-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector3 normalize( const Vector3 & vec );
+
+// Compute cross product of two 3-D vectors
+// 
+inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Outer product of two 3-D vectors
+// 
+inline const Matrix3 outer( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Pre-multiply a row vector by a 3x3 matrix
+// 
+inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat );
+
+// Cross-product matrix of a 3-D vector
+// 
+inline const Matrix3 crossMatrix( const Vector3 & vec );
+
+// Create cross-product matrix and multiply
+// NOTE: 
+// Faster than separately creating a cross-product matrix and multiplying.
+// 
+inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat );
+
+// Linear interpolation between two 3-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 lerp( vec_float4 t, const Vector3 & vec0, const Vector3 & vec1 );
+
+// Spherical linear interpolation between two 3-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 slerp( vec_float4 t, const Vector3 & unitVec0, const Vector3 & unitVec1 );
+
+// Conditionally select between two 3-D vectors
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, vec_uint4 select1 );
+
+// Load four three-float 3-D vectors, stored in three quadwords
+// 
+inline void loadXYZArray( Vector3 & vec, const vec_float4 * threeQuads );
+
+// Store four slots of an SoA 3-D vector in three quadwords
+// 
+inline void storeXYZArray( const Vector3 & vec, vec_float4 * threeQuads );
+
+// Store eight slots of two SoA 3-D vectors as half-floats
+// 
+inline void storeHalfFloats( const Vector3 & vec0, const Vector3 & vec1, vec_ushort8 * threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector3 & vec );
+
+// Print a 3-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector3 & vec, const char * name );
+
+#endif
+
+// A set of four 4-D vectors in structure-of-arrays format
+//
+class Vector4
+{
+    typedef vec_float4 vec_float4_t;
+    vec_float4 mX;
+    vec_float4 mY;
+    vec_float4 mZ;
+    vec_float4 mW;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector4( ) { };
+
+    // Copy a 4-D vector
+    // 
+    inline Vector4( const Vector4 & vec );
+
+    // Construct a 4-D vector from x, y, z, and w elements
+    // 
+    inline Vector4( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
+
+    // Construct a 4-D vector from a 3-D vector and a scalar
+    // 
+    inline Vector4( const Vector3 & xyz, vec_float4 w );
+
+    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+    // 
+    explicit inline Vector4( const Vector3 & vec );
+
+    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+    // 
+    explicit inline Vector4( const Point3 & pnt );
+
+    // Copy elements from a quaternion into a 4-D vector
+    // 
+    explicit inline Vector4( const Quat & quat );
+
+    // Set all elements of a 4-D vector to the same scalar value
+    // 
+    explicit inline Vector4( vec_float4 scalar );
+
+    // Replicate an AoS 4-D vector
+    // 
+    inline Vector4( Aos::Vector4 vec );
+
+    // Insert four AoS 4-D vectors
+    // 
+    inline Vector4( Aos::Vector4 vec0, Aos::Vector4 vec1, Aos::Vector4 vec2, Aos::Vector4 vec3 );
+
+    // Extract four AoS 4-D vectors
+    // 
+    inline void get4Aos( Aos::Vector4 & result0, Aos::Vector4 & result1, Aos::Vector4 & result2, Aos::Vector4 & result3 ) const;
+
+    // Assign one 4-D vector to another
+    // 
+    inline Vector4 & operator =( const Vector4 & vec );
+
+    // Set the x, y, and z elements of a 4-D vector
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Vector4 & setXYZ( const Vector3 & vec );
+
+    // Get the x, y, and z elements of a 4-D vector
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a 4-D vector
+    // 
+    inline Vector4 & setX( vec_float4 x );
+
+    // Set the y element of a 4-D vector
+    // 
+    inline Vector4 & setY( vec_float4 y );
+
+    // Set the z element of a 4-D vector
+    // 
+    inline Vector4 & setZ( vec_float4 z );
+
+    // Set the w element of a 4-D vector
+    // 
+    inline Vector4 & setW( vec_float4 w );
+
+    // Get the x element of a 4-D vector
+    // 
+    inline vec_float4 getX( ) const;
+
+    // Get the y element of a 4-D vector
+    // 
+    inline vec_float4 getY( ) const;
+
+    // Get the z element of a 4-D vector
+    // 
+    inline vec_float4 getZ( ) const;
+
+    // Get the w element of a 4-D vector
+    // 
+    inline vec_float4 getW( ) const;
+
+    // Set an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline Vector4 & setElem( int idx, vec_float4 value );
+
+    // Get an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline vec_float4 getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline vec_float4_t & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline vec_float4 operator []( int idx ) const;
+
+    // Add two 4-D vectors
+    // 
+    inline const Vector4 operator +( const Vector4 & vec ) const;
+
+    // Subtract a 4-D vector from another 4-D vector
+    // 
+    inline const Vector4 operator -( const Vector4 & vec ) const;
+
+    // Multiply a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator *( vec_float4 scalar ) const;
+
+    // Divide a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator /( vec_float4 scalar ) const;
+
+    // Perform compound assignment and addition with a 4-D vector
+    // 
+    inline Vector4 & operator +=( const Vector4 & vec );
+
+    // Perform compound assignment and subtraction by a 4-D vector
+    // 
+    inline Vector4 & operator -=( const Vector4 & vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector4 & operator *=( vec_float4 scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector4 & operator /=( vec_float4 scalar );
+
+    // Negate all elements of a 4-D vector
+    // 
+    inline const Vector4 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector4 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector4 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector4 zAxis( );
+
+    // Construct w axis
+    // 
+    static inline const Vector4 wAxis( );
+
+};
+
+// Multiply a 4-D vector by a scalar
+// 
+inline const Vector4 operator *( vec_float4 scalar, const Vector4 & vec );
+
+// Multiply two 4-D vectors per element
+// 
+inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Divide two 4-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Compute the reciprocal of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector4 recipPerElem( const Vector4 & vec );
+
+// Compute the square root of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Vector4 sqrtPerElem( const Vector4 & vec );
+
+// Compute the reciprocal square root of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Vector4 rsqrtPerElem( const Vector4 & vec );
+
+// Compute the absolute value of a 4-D vector per element
+// 
+inline const Vector4 absPerElem( const Vector4 & vec );
+
+// Copy sign from one 4-D vector to another, per element
+// 
+inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Maximum of two 4-D vectors per element
+// 
+inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Minimum of two 4-D vectors per element
+// 
+inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Maximum element of a 4-D vector
+// 
+inline vec_float4 maxElem( const Vector4 & vec );
+
+// Minimum element of a 4-D vector
+// 
+inline vec_float4 minElem( const Vector4 & vec );
+
+// Compute the sum of all elements of a 4-D vector
+// 
+inline vec_float4 sum( const Vector4 & vec );
+
+// Compute the dot product of two 4-D vectors
+// 
+inline vec_float4 dot( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Compute the square of the length of a 4-D vector
+// 
+inline vec_float4 lengthSqr( const Vector4 & vec );
+
+// Compute the length of a 4-D vector
+// 
+inline vec_float4 length( const Vector4 & vec );
+
+// Normalize a 4-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector4 normalize( const Vector4 & vec );
+
+// Outer product of two 4-D vectors
+// 
+inline const Matrix4 outer( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Linear interpolation between two 4-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 lerp( vec_float4 t, const Vector4 & vec0, const Vector4 & vec1 );
+
+// Spherical linear interpolation between two 4-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 slerp( vec_float4 t, const Vector4 & unitVec0, const Vector4 & unitVec1 );
+
+// Conditionally select between two 4-D vectors
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, vec_uint4 select1 );
+
+// Store four slots of an SoA 4-D vector as half-floats
+// 
+inline void storeHalfFloats( const Vector4 & vec, vec_ushort8 * twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector4 & vec );
+
+// Print a 4-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector4 & vec, const char * name );
+
+#endif
+
+// A set of four 3-D points in structure-of-arrays format
+//
+class Point3
+{
+    typedef vec_float4 vec_float4_t;
+    vec_float4 mX;
+    vec_float4 mY;
+    vec_float4 mZ;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Point3( ) { };
+
+    // Copy a 3-D point
+    // 
+    inline Point3( const Point3 & pnt );
+
+    // Construct a 3-D point from x, y, and z elements
+    // 
+    inline Point3( vec_float4 x, vec_float4 y, vec_float4 z );
+
+    // Copy elements from a 3-D vector into a 3-D point
+    // 
+    explicit inline Point3( const Vector3 & vec );
+
+    // Set all elements of a 3-D point to the same scalar value
+    // 
+    explicit inline Point3( vec_float4 scalar );
+
+    // Replicate an AoS 3-D point
+    // 
+    inline Point3( Aos::Point3 pnt );
+
+    // Insert four AoS 3-D points
+    // 
+    inline Point3( Aos::Point3 pnt0, Aos::Point3 pnt1, Aos::Point3 pnt2, Aos::Point3 pnt3 );
+
+    // Extract four AoS 3-D points
+    // 
+    inline void get4Aos( Aos::Point3 & result0, Aos::Point3 & result1, Aos::Point3 & result2, Aos::Point3 & result3 ) const;
+
+    // Assign one 3-D point to another
+    // 
+    inline Point3 & operator =( const Point3 & pnt );
+
+    // Set the x element of a 3-D point
+    // 
+    inline Point3 & setX( vec_float4 x );
+
+    // Set the y element of a 3-D point
+    // 
+    inline Point3 & setY( vec_float4 y );
+
+    // Set the z element of a 3-D point
+    // 
+    inline Point3 & setZ( vec_float4 z );
+
+    // Get the x element of a 3-D point
+    // 
+    inline vec_float4 getX( ) const;
+
+    // Get the y element of a 3-D point
+    // 
+    inline vec_float4 getY( ) const;
+
+    // Get the z element of a 3-D point
+    // 
+    inline vec_float4 getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D point by index
+    // 
+    inline Point3 & setElem( int idx, vec_float4 value );
+
+    // Get an x, y, or z element of a 3-D point by index
+    // 
+    inline vec_float4 getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline vec_float4_t & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline vec_float4 operator []( int idx ) const;
+
+    // Subtract a 3-D point from another 3-D point
+    // 
+    inline const Vector3 operator -( const Point3 & pnt ) const;
+
+    // Add a 3-D point to a 3-D vector
+    // 
+    inline const Point3 operator +( const Vector3 & vec ) const;
+
+    // Subtract a 3-D vector from a 3-D point
+    // 
+    inline const Point3 operator -( const Vector3 & vec ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Point3 & operator +=( const Vector3 & vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Point3 & operator -=( const Vector3 & vec );
+
+};
+
+// Multiply two 3-D points per element
+// 
+inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Divide two 3-D points per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Compute the reciprocal of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Point3 recipPerElem( const Point3 & pnt );
+
+// Compute the square root of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Point3 sqrtPerElem( const Point3 & pnt );
+
+// Compute the reciprocal square root of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Point3 rsqrtPerElem( const Point3 & pnt );
+
+// Compute the absolute value of a 3-D point per element
+// 
+inline const Point3 absPerElem( const Point3 & pnt );
+
+// Copy sign from one 3-D point to another, per element
+// 
+inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Maximum of two 3-D points per element
+// 
+inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Minimum of two 3-D points per element
+// 
+inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Maximum element of a 3-D point
+// 
+inline vec_float4 maxElem( const Point3 & pnt );
+
+// Minimum element of a 3-D point
+// 
+inline vec_float4 minElem( const Point3 & pnt );
+
+// Compute the sum of all elements of a 3-D point
+// 
+inline vec_float4 sum( const Point3 & pnt );
+
+// Apply uniform scale to a 3-D point
+// 
+inline const Point3 scale( const Point3 & pnt, vec_float4 scaleVal );
+
+// Apply non-uniform scale to a 3-D point
+// 
+inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec );
+
+// Scalar projection of a 3-D point on a unit-length 3-D vector
+// 
+inline vec_float4 projection( const Point3 & pnt, const Vector3 & unitVec );
+
+// Compute the square of the distance of a 3-D point from the coordinate-system origin
+// 
+inline vec_float4 distSqrFromOrigin( const Point3 & pnt );
+
+// Compute the distance of a 3-D point from the coordinate-system origin
+// 
+inline vec_float4 distFromOrigin( const Point3 & pnt );
+
+// Compute the square of the distance between two 3-D points
+// 
+inline vec_float4 distSqr( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Compute the distance between two 3-D points
+// 
+inline vec_float4 dist( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Linear interpolation between two 3-D points
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Point3 lerp( vec_float4 t, const Point3 & pnt0, const Point3 & pnt1 );
+
+// Conditionally select between two 3-D points
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, vec_uint4 select1 );
+
+// Load four three-float 3-D points, stored in three quadwords
+// 
+inline void loadXYZArray( Point3 & pnt, const vec_float4 * threeQuads );
+
+// Store four slots of an SoA 3-D point in three quadwords
+// 
+inline void storeXYZArray( const Point3 & pnt, vec_float4 * threeQuads );
+
+// Store eight slots of two SoA 3-D points as half-floats
+// 
+inline void storeHalfFloats( const Point3 & pnt0, const Point3 & pnt1, vec_ushort8 * threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D point
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Point3 & pnt );
+
+// Print a 3-D point and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Point3 & pnt, const char * name );
+
+#endif
+
+// A set of four quaternions in structure-of-arrays format
+//
+class Quat
+{
+    typedef vec_float4 vec_float4_t;
+    vec_float4 mX;
+    vec_float4 mY;
+    vec_float4 mZ;
+    vec_float4 mW;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Quat( ) { };
+
+    // Copy a quaternion
+    // 
+    inline Quat( const Quat & quat );
+
+    // Construct a quaternion from x, y, z, and w elements
+    // 
+    inline Quat( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
+
+    // Construct a quaternion from a 3-D vector and a scalar
+    // 
+    inline Quat( const Vector3 & xyz, vec_float4 w );
+
+    // Copy elements from a 4-D vector into a quaternion
+    // 
+    explicit inline Quat( const Vector4 & vec );
+
+    // Convert a rotation matrix to a unit-length quaternion
+    // 
+    explicit inline Quat( const Matrix3 & rotMat );
+
+    // Set all elements of a quaternion to the same scalar value
+    // 
+    explicit inline Quat( vec_float4 scalar );
+
+    // Replicate an AoS quaternion
+    // 
+    inline Quat( Aos::Quat quat );
+
+    // Insert four AoS quaternions
+    // 
+    inline Quat( Aos::Quat quat0, Aos::Quat quat1, Aos::Quat quat2, Aos::Quat quat3 );
+
+    // Extract four AoS quaternions
+    // 
+    inline void get4Aos( Aos::Quat & result0, Aos::Quat & result1, Aos::Quat & result2, Aos::Quat & result3 ) const;
+
+    // Assign one quaternion to another
+    // 
+    inline Quat & operator =( const Quat & quat );
+
+    // Set the x, y, and z elements of a quaternion
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Quat & setXYZ( const Vector3 & vec );
+
+    // Get the x, y, and z elements of a quaternion
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a quaternion
+    // 
+    inline Quat & setX( vec_float4 x );
+
+    // Set the y element of a quaternion
+    // 
+    inline Quat & setY( vec_float4 y );
+
+    // Set the z element of a quaternion
+    // 
+    inline Quat & setZ( vec_float4 z );
+
+    // Set the w element of a quaternion
+    // 
+    inline Quat & setW( vec_float4 w );
+
+    // Get the x element of a quaternion
+    // 
+    inline vec_float4 getX( ) const;
+
+    // Get the y element of a quaternion
+    // 
+    inline vec_float4 getY( ) const;
+
+    // Get the z element of a quaternion
+    // 
+    inline vec_float4 getZ( ) const;
+
+    // Get the w element of a quaternion
+    // 
+    inline vec_float4 getW( ) const;
+
+    // Set an x, y, z, or w element of a quaternion by index
+    // 
+    inline Quat & setElem( int idx, vec_float4 value );
+
+    // Get an x, y, z, or w element of a quaternion by index
+    // 
+    inline vec_float4 getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline vec_float4_t & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline vec_float4 operator []( int idx ) const;
+
+    // Add two quaternions
+    // 
+    inline const Quat operator +( const Quat & quat ) const;
+
+    // Subtract a quaternion from another quaternion
+    // 
+    inline const Quat operator -( const Quat & quat ) const;
+
+    // Multiply two quaternions
+    // 
+    inline const Quat operator *( const Quat & quat ) const;
+
+    // Multiply a quaternion by a scalar
+    // 
+    inline const Quat operator *( vec_float4 scalar ) const;
+
+    // Divide a quaternion by a scalar
+    // 
+    inline const Quat operator /( vec_float4 scalar ) const;
+
+    // Perform compound assignment and addition with a quaternion
+    // 
+    inline Quat & operator +=( const Quat & quat );
+
+    // Perform compound assignment and subtraction by a quaternion
+    // 
+    inline Quat & operator -=( const Quat & quat );
+
+    // Perform compound assignment and multiplication by a quaternion
+    // 
+    inline Quat & operator *=( const Quat & quat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Quat & operator *=( vec_float4 scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Quat & operator /=( vec_float4 scalar );
+
+    // Negate all elements of a quaternion
+    // 
+    inline const Quat operator -( ) const;
+
+    // Construct an identity quaternion
+    // 
+    static inline const Quat identity( );
+
+    // Construct a quaternion to rotate between two unit-length 3-D vectors
+    // NOTE: 
+    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+    // 
+    static inline const Quat rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 );
+
+    // Construct a quaternion to rotate around a unit-length 3-D vector
+    // 
+    static inline const Quat rotation( vec_float4 radians, const Vector3 & unitVec );
+
+    // Construct a quaternion to rotate around the x axis
+    // 
+    static inline const Quat rotationX( vec_float4 radians );
+
+    // Construct a quaternion to rotate around the y axis
+    // 
+    static inline const Quat rotationY( vec_float4 radians );
+
+    // Construct a quaternion to rotate around the z axis
+    // 
+    static inline const Quat rotationZ( vec_float4 radians );
+
+};
+
+// Multiply a quaternion by a scalar
+// 
+inline const Quat operator *( vec_float4 scalar, const Quat & quat );
+
+// Compute the conjugate of a quaternion
+// 
+inline const Quat conj( const Quat & quat );
+
+// Use a unit-length quaternion to rotate a 3-D vector
+// 
+inline const Vector3 rotate( const Quat & unitQuat, const Vector3 & vec );
+
+// Compute the dot product of two quaternions
+// 
+inline vec_float4 dot( const Quat & quat0, const Quat & quat1 );
+
+// Compute the norm of a quaternion
+// 
+inline vec_float4 norm( const Quat & quat );
+
+// Compute the length of a quaternion
+// 
+inline vec_float4 length( const Quat & quat );
+
+// Normalize a quaternion
+// NOTE: 
+// The result is unpredictable when all elements of quat are at or near zero.
+// 
+inline const Quat normalize( const Quat & quat );
+
+// Linear interpolation between two quaternions
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat lerp( vec_float4 t, const Quat & quat0, const Quat & quat1 );
+
+// Spherical linear interpolation between two quaternions
+// NOTE: 
+// Interpolates along the shortest path between orientations.
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat slerp( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1 );
+
+// Spherical quadrangle interpolation
+// 
+inline const Quat squad( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 );
+
+// Conditionally select between two quaternions
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Quat select( const Quat & quat0, const Quat & quat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a quaternion
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Quat & quat );
+
+// Print a quaternion and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Quat & quat, const char * name );
+
+#endif
+
+// A set of four 3x3 matrices in structure-of-arrays format
+//
+class Matrix3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix3( ) { };
+
+    // Copy a 3x3 matrix
+    // 
+    inline Matrix3( const Matrix3 & mat );
+
+    // Construct a 3x3 matrix containing the specified columns
+    // 
+    inline Matrix3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2 );
+
+    // Construct a 3x3 rotation matrix from a unit-length quaternion
+    // 
+    explicit inline Matrix3( const Quat & unitQuat );
+
+    // Set all elements of a 3x3 matrix to the same scalar value
+    // 
+    explicit inline Matrix3( vec_float4 scalar );
+
+    // Replicate an AoS 3x3 matrix
+    // 
+    inline Matrix3( const Aos::Matrix3 & mat );
+
+    // Insert four AoS 3x3 matrices
+    // 
+    inline Matrix3( const Aos::Matrix3 & mat0, const Aos::Matrix3 & mat1, const Aos::Matrix3 & mat2, const Aos::Matrix3 & mat3 );
+
+    // Extract four AoS 3x3 matrices
+    // 
+    inline void get4Aos( Aos::Matrix3 & result0, Aos::Matrix3 & result1, Aos::Matrix3 & result2, Aos::Matrix3 & result3 ) const;
+
+    // Assign one 3x3 matrix to another
+    // 
+    inline Matrix3 & operator =( const Matrix3 & mat );
+
+    // Set column 0 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol0( const Vector3 & col0 );
+
+    // Set column 1 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol1( const Vector3 & col1 );
+
+    // Set column 2 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol2( const Vector3 & col2 );
+
+    // Get column 0 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Set the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setCol( int col, const Vector3 & vec );
+
+    // Set the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setRow( int row, const Vector3 & vec );
+
+    // Get the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline Matrix3 & setElem( int col, int row, vec_float4 val );
+
+    // Get the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline vec_float4 getElem( int col, int row ) const;
+
+    // Add two 3x3 matrices
+    // 
+    inline const Matrix3 operator +( const Matrix3 & mat ) const;
+
+    // Subtract a 3x3 matrix from another 3x3 matrix
+    // 
+    inline const Matrix3 operator -( const Matrix3 & mat ) const;
+
+    // Negate all elements of a 3x3 matrix
+    // 
+    inline const Matrix3 operator -( ) const;
+
+    // Multiply a 3x3 matrix by a scalar
+    // 
+    inline const Matrix3 operator *( vec_float4 scalar ) const;
+
+    // Multiply a 3x3 matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( const Vector3 & vec ) const;
+
+    // Multiply two 3x3 matrices
+    // 
+    inline const Matrix3 operator *( const Matrix3 & mat ) const;
+
+    // Perform compound assignment and addition with a 3x3 matrix
+    // 
+    inline Matrix3 & operator +=( const Matrix3 & mat );
+
+    // Perform compound assignment and subtraction by a 3x3 matrix
+    // 
+    inline Matrix3 & operator -=( const Matrix3 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix3 & operator *=( vec_float4 scalar );
+
+    // Perform compound assignment and multiplication by a 3x3 matrix
+    // 
+    inline Matrix3 & operator *=( const Matrix3 & mat );
+
+    // Construct an identity 3x3 matrix
+    // 
+    static inline const Matrix3 identity( );
+
+    // Construct a 3x3 matrix to rotate around the x axis
+    // 
+    static inline const Matrix3 rotationX( vec_float4 radians );
+
+    // Construct a 3x3 matrix to rotate around the y axis
+    // 
+    static inline const Matrix3 rotationY( vec_float4 radians );
+
+    // Construct a 3x3 matrix to rotate around the z axis
+    // 
+    static inline const Matrix3 rotationZ( vec_float4 radians );
+
+    // Construct a 3x3 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix3 rotationZYX( const Vector3 & radiansXYZ );
+
+    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix3 rotation( vec_float4 radians, const Vector3 & unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix3 rotation( const Quat & unitQuat );
+
+    // Construct a 3x3 matrix to perform scaling
+    // 
+    static inline const Matrix3 scale( const Vector3 & scaleVec );
+
+};
+// Multiply a 3x3 matrix by a scalar
+// 
+inline const Matrix3 operator *( vec_float4 scalar, const Matrix3 & mat );
+
+// Append (post-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat );
+
+// Multiply two 3x3 matrices per element
+// 
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
+
+// Compute the absolute value of a 3x3 matrix per element
+// 
+inline const Matrix3 absPerElem( const Matrix3 & mat );
+
+// Transpose of a 3x3 matrix
+// 
+inline const Matrix3 transpose( const Matrix3 & mat );
+
+// Compute the inverse of a 3x3 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix3 inverse( const Matrix3 & mat );
+
+// Determinant of a 3x3 matrix
+// 
+inline vec_float4 determinant( const Matrix3 & mat );
+
+// Conditionally select between two 3x3 matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x3 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat );
+
+// Print a 3x3 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat, const char * name );
+
+#endif
+
+// A set of four 4x4 matrices in structure-of-arrays format
+//
+class Matrix4
+{
+    Vector4 mCol0;
+    Vector4 mCol1;
+    Vector4 mCol2;
+    Vector4 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix4( ) { };
+
+    // Copy a 4x4 matrix
+    // 
+    inline Matrix4( const Matrix4 & mat );
+
+    // Construct a 4x4 matrix containing the specified columns
+    // 
+    inline Matrix4( const Vector4 & col0, const Vector4 & col1, const Vector4 & col2, const Vector4 & col3 );
+
+    // Construct a 4x4 matrix from a 3x4 transformation matrix
+    // 
+    explicit inline Matrix4( const Transform3 & mat );
+
+    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Matrix4( const Matrix3 & mat, const Vector3 & translateVec );
+
+    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Matrix4( const Quat & unitQuat, const Vector3 & translateVec );
+
+    // Set all elements of a 4x4 matrix to the same scalar value
+    // 
+    explicit inline Matrix4( vec_float4 scalar );
+
+    // Replicate an AoS 4x4 matrix
+    // 
+    inline Matrix4( const Aos::Matrix4 & mat );
+
+    // Insert four AoS 4x4 matrices
+    // 
+    inline Matrix4( const Aos::Matrix4 & mat0, const Aos::Matrix4 & mat1, const Aos::Matrix4 & mat2, const Aos::Matrix4 & mat3 );
+
+    // Extract four AoS 4x4 matrices
+    // 
+    inline void get4Aos( Aos::Matrix4 & result0, Aos::Matrix4 & result1, Aos::Matrix4 & result2, Aos::Matrix4 & result3 ) const;
+
+    // Assign one 4x4 matrix to another
+    // 
+    inline Matrix4 & operator =( const Matrix4 & mat );
+
+    // Set the upper-left 3x3 submatrix
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 4x4 matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setTranslation( const Vector3 & translateVec );
+
+    // Get the translation component of a 4x4 matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol0( const Vector4 & col0 );
+
+    // Set column 1 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol1( const Vector4 & col1 );
+
+    // Set column 2 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol2( const Vector4 & col2 );
+
+    // Set column 3 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol3( const Vector4 & col3 );
+
+    // Get column 0 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol0( ) const;
+
+    // Get column 1 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol1( ) const;
+
+    // Get column 2 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol2( ) const;
+
+    // Get column 3 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol3( ) const;
+
+    // Set the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setCol( int col, const Vector4 & vec );
+
+    // Set the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setRow( int row, const Vector4 & vec );
+
+    // Get the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getCol( int col ) const;
+
+    // Get the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector4 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector4 operator []( int col ) const;
+
+    // Set the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline Matrix4 & setElem( int col, int row, vec_float4 val );
+
+    // Get the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline vec_float4 getElem( int col, int row ) const;
+
+    // Add two 4x4 matrices
+    // 
+    inline const Matrix4 operator +( const Matrix4 & mat ) const;
+
+    // Subtract a 4x4 matrix from another 4x4 matrix
+    // 
+    inline const Matrix4 operator -( const Matrix4 & mat ) const;
+
+    // Negate all elements of a 4x4 matrix
+    // 
+    inline const Matrix4 operator -( ) const;
+
+    // Multiply a 4x4 matrix by a scalar
+    // 
+    inline const Matrix4 operator *( vec_float4 scalar ) const;
+
+    // Multiply a 4x4 matrix by a 4-D vector
+    // 
+    inline const Vector4 operator *( const Vector4 & vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D vector
+    // 
+    inline const Vector4 operator *( const Vector3 & vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D point
+    // 
+    inline const Vector4 operator *( const Point3 & pnt ) const;
+
+    // Multiply two 4x4 matrices
+    // 
+    inline const Matrix4 operator *( const Matrix4 & mat ) const;
+
+    // Multiply a 4x4 matrix by a 3x4 transformation matrix
+    // 
+    inline const Matrix4 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and addition with a 4x4 matrix
+    // 
+    inline Matrix4 & operator +=( const Matrix4 & mat );
+
+    // Perform compound assignment and subtraction by a 4x4 matrix
+    // 
+    inline Matrix4 & operator -=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix4 & operator *=( vec_float4 scalar );
+
+    // Perform compound assignment and multiplication by a 4x4 matrix
+    // 
+    inline Matrix4 & operator *=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Matrix4 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 4x4 matrix
+    // 
+    static inline const Matrix4 identity( );
+
+    // Construct a 4x4 matrix to rotate around the x axis
+    // 
+    static inline const Matrix4 rotationX( vec_float4 radians );
+
+    // Construct a 4x4 matrix to rotate around the y axis
+    // 
+    static inline const Matrix4 rotationY( vec_float4 radians );
+
+    // Construct a 4x4 matrix to rotate around the z axis
+    // 
+    static inline const Matrix4 rotationZ( vec_float4 radians );
+
+    // Construct a 4x4 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix4 rotationZYX( const Vector3 & radiansXYZ );
+
+    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix4 rotation( vec_float4 radians, const Vector3 & unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix4 rotation( const Quat & unitQuat );
+
+    // Construct a 4x4 matrix to perform scaling
+    // 
+    static inline const Matrix4 scale( const Vector3 & scaleVec );
+
+    // Construct a 4x4 matrix to perform translation
+    // 
+    static inline const Matrix4 translation( const Vector3 & translateVec );
+
+    // Construct viewing matrix based on eye position, position looked at, and up direction
+    // 
+    static inline const Matrix4 lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec );
+
+    // Construct a perspective projection matrix
+    // 
+    static inline const Matrix4 perspective( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar );
+
+    // Construct a perspective projection matrix based on frustum
+    // 
+    static inline const Matrix4 frustum( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
+
+    // Construct an orthographic projection matrix
+    // 
+    static inline const Matrix4 orthographic( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
+
+};
+// Multiply a 4x4 matrix by a scalar
+// 
+inline const Matrix4 operator *( vec_float4 scalar, const Matrix4 & mat );
+
+// Append (post-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat );
+
+// Multiply two 4x4 matrices per element
+// 
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
+
+// Compute the absolute value of a 4x4 matrix per element
+// 
+inline const Matrix4 absPerElem( const Matrix4 & mat );
+
+// Transpose of a 4x4 matrix
+// 
+inline const Matrix4 transpose( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 inverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 affineInverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+// 
+inline const Matrix4 orthoInverse( const Matrix4 & mat );
+
+// Determinant of a 4x4 matrix
+// 
+inline vec_float4 determinant( const Matrix4 & mat );
+
+// Conditionally select between two 4x4 matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4x4 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat );
+
+// Print a 4x4 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat, const char * name );
+
+#endif
+
+// A set of four 3x4 transformation matrices in structure-of-arrays format
+//
+class Transform3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+    Vector3 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Transform3( ) { };
+
+    // Copy a 3x4 transformation matrix
+    // 
+    inline Transform3( const Transform3 & tfrm );
+
+    // Construct a 3x4 transformation matrix containing the specified columns
+    // 
+    inline Transform3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2, const Vector3 & col3 );
+
+    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Transform3( const Matrix3 & tfrm, const Vector3 & translateVec );
+
+    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Transform3( const Quat & unitQuat, const Vector3 & translateVec );
+
+    // Set all elements of a 3x4 transformation matrix to the same scalar value
+    // 
+    explicit inline Transform3( vec_float4 scalar );
+
+    // Replicate an AoS 3x4 transformation matrix
+    // 
+    inline Transform3( const Aos::Transform3 & tfrm );
+
+    // Insert four AoS 3x4 transformation matrices
+    // 
+    inline Transform3( const Aos::Transform3 & tfrm0, const Aos::Transform3 & tfrm1, const Aos::Transform3 & tfrm2, const Aos::Transform3 & tfrm3 );
+
+    // Extract four AoS 3x4 transformation matrices
+    // 
+    inline void get4Aos( Aos::Transform3 & result0, Aos::Transform3 & result1, Aos::Transform3 & result2, Aos::Transform3 & result3 ) const;
+
+    // Assign one 3x4 transformation matrix to another
+    // 
+    inline Transform3 & operator =( const Transform3 & tfrm );
+
+    // Set the upper-left 3x3 submatrix
+    // 
+    inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // 
+    inline Transform3 & setTranslation( const Vector3 & translateVec );
+
+    // Get the translation component of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol0( const Vector3 & col0 );
+
+    // Set column 1 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol1( const Vector3 & col1 );
+
+    // Set column 2 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol2( const Vector3 & col2 );
+
+    // Set column 3 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol3( const Vector3 & col3 );
+
+    // Get column 0 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Get column 3 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol3( ) const;
+
+    // Set the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setCol( int col, const Vector3 & vec );
+
+    // Set the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setRow( int row, const Vector4 & vec );
+
+    // Get the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline Transform3 & setElem( int col, int row, vec_float4 val );
+
+    // Get the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline vec_float4 getElem( int col, int row ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( const Vector3 & vec ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D point
+    // 
+    inline const Point3 operator *( const Point3 & pnt ) const;
+
+    // Multiply two 3x4 transformation matrices
+    // 
+    inline const Transform3 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Transform3 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 3x4 transformation matrix
+    // 
+    static inline const Transform3 identity( );
+
+    // Construct a 3x4 transformation matrix to rotate around the x axis
+    // 
+    static inline const Transform3 rotationX( vec_float4 radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the y axis
+    // 
+    static inline const Transform3 rotationY( vec_float4 radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the z axis
+    // 
+    static inline const Transform3 rotationZ( vec_float4 radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Transform3 rotationZYX( const Vector3 & radiansXYZ );
+
+    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Transform3 rotation( vec_float4 radians, const Vector3 & unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Transform3 rotation( const Quat & unitQuat );
+
+    // Construct a 3x4 transformation matrix to perform scaling
+    // 
+    static inline const Transform3 scale( const Vector3 & scaleVec );
+
+    // Construct a 3x4 transformation matrix to perform translation
+    // 
+    static inline const Transform3 translation( const Vector3 & translateVec );
+
+};
+// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm );
+
+// Multiply two 3x4 transformation matrices per element
+// 
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
+
+// Compute the absolute value of a 3x4 transformation matrix per element
+// 
+inline const Transform3 absPerElem( const Transform3 & tfrm );
+
+// Inverse of a 3x4 transformation matrix
+// NOTE: 
+// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+// 
+inline const Transform3 inverse( const Transform3 & tfrm );
+
+// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+// 
+inline const Transform3 orthoInverse( const Transform3 & tfrm );
+
+// Conditionally select between two 3x4 transformation matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x4 transformation matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm );
+
+// Print a 3x4 transformation matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm, const char * name );
+
+#endif
+
+} // namespace Soa
+} // namespace Vectormath
+
+#include "vec_soa.h"
+#include "quat_soa.h"
+#include "mat_soa.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/tests/clean.pl b/Extras/vectormathlibrary/tests/clean.pl
index ec425a2c4..412f596cc 100644
--- a/Extras/vectormathlibrary/tests/clean.pl
+++ b/Extras/vectormathlibrary/tests/clean.pl
@@ -1,109 +1,109 @@
-#!/usr/bin/perl
-
-#
-#  Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-#  All rights reserved.
-#
-#  Redistribution and use in source and binary forms,
-#  with or without modification, are permitted provided that the
-#  following conditions are met:
-#   * Redistributions of source code must retain the above copyright
-#     notice, this list of conditions and the following disclaimer.
-#   * Redistributions in binary form must reproduce the above copyright
-#     notice, this list of conditions and the following disclaimer in the
-#     documentation and/or other materials provided with the distribution.
-#   * Neither the name of the Sony Computer Entertainment Inc nor the names
-#     of its contributors may be used to endorse or promote products derived
-#     from this software without specific prior written permission.
-#
-#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-#  POSSIBILITY OF SUCH DAMAGE.
-#
-
-$lineno = 0;
-
-sub getLine
-{
-   local( $line );
-   
-   $line = <STDIN>;
-
-   while( $line =~ m/^lv2\([^\)]*\)\:$/ )
-   {
-       $line = <STDIN>;
-   }
-
-   $line =~ s/^lv2\([^\)]*\)\: //;
-
-   return $line;
-}
-
-while(($line = <STDIN>) !~ m/__begin__/)
-{
-}
-
-$countSlotLines = 0;
-
-while( $line = &getLine )
-{
-   $lineno++;
-   
-   if ( $line =~ m/__end__/ )
-   {
-      exit;
-   }
-   
-   # if soa print, only save first slot
-
-   if ( $line =~ m/^slot ([1-3])/ )
-   {
-      while ( $line =~ m/^slot [1-3]/ )
-      {
-         # skip all lines for this slot
-
-         for ( $i = 0; $i < $slotLines; $i++ )
-         {
-            $line = &getLine;
-         }
-
-         # get next line
-
-         $line = &getLine;
-      }
-
-      # stop counting slot lines
-
-      $countSlotLines = 0;
-   }
-   elsif ( $countSlotLines )
-   {
-      $slotLines++;
-   }
-
-   if ( $line =~ m/^slot 0\:(.?)/ )
-   {
-      $countSlotLines = 1;
-
-      if ( $1 eq ' ' )
-      {
-         $line =~ s/^slot 0\: //;
-         $slotLines = 0;
-      }
-      else
-      {
-         $line = &getLine;
-         $slotLines = 1;
-      }
-   }
-
-   print $line;
-}
+#!/usr/bin/perl
+
+#
+#  Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms,
+#  with or without modification, are permitted provided that the
+#  following conditions are met:
+#   * Redistributions of source code must retain the above copyright
+#     notice, this list of conditions and the following disclaimer.
+#   * Redistributions in binary form must reproduce the above copyright
+#     notice, this list of conditions and the following disclaimer in the
+#     documentation and/or other materials provided with the distribution.
+#   * Neither the name of the Sony Computer Entertainment Inc nor the names
+#     of its contributors may be used to endorse or promote products derived
+#     from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+#  POSSIBILITY OF SUCH DAMAGE.
+#
+
+$lineno = 0;
+
+sub getLine
+{
+   local( $line );
+   
+   $line = <STDIN>;
+
+   while( $line =~ m/^lv2\([^\)]*\)\:$/ )
+   {
+       $line = <STDIN>;
+   }
+
+   $line =~ s/^lv2\([^\)]*\)\: //;
+
+   return $line;
+}
+
+while(($line = <STDIN>) !~ m/__begin__/)
+{
+}
+
+$countSlotLines = 0;
+
+while( $line = &getLine )
+{
+   $lineno++;
+   
+   if ( $line =~ m/__end__/ )
+   {
+      exit;
+   }
+   
+   # if soa print, only save first slot
+
+   if ( $line =~ m/^slot ([1-3])/ )
+   {
+      while ( $line =~ m/^slot [1-3]/ )
+      {
+         # skip all lines for this slot
+
+         for ( $i = 0; $i < $slotLines; $i++ )
+         {
+            $line = &getLine;
+         }
+
+         # get next line
+
+         $line = &getLine;
+      }
+
+      # stop counting slot lines
+
+      $countSlotLines = 0;
+   }
+   elsif ( $countSlotLines )
+   {
+      $slotLines++;
+   }
+
+   if ( $line =~ m/^slot 0\:(.?)/ )
+   {
+      $countSlotLines = 1;
+
+      if ( $1 eq ' ' )
+      {
+         $line =~ s/^slot 0\: //;
+         $slotLines = 0;
+      }
+      else
+      {
+         $line = &getLine;
+         $slotLines = 1;
+      }
+   }
+
+   print $line;
+}
diff --git a/Extras/vectormathlibrary/tests/compare.pl b/Extras/vectormathlibrary/tests/compare.pl
index f9fe1ca50..ed4c929a3 100644
--- a/Extras/vectormathlibrary/tests/compare.pl
+++ b/Extras/vectormathlibrary/tests/compare.pl
@@ -1,95 +1,95 @@
-#!/usr/bin/perl
-
-#
-#  Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-#  All rights reserved.
-#
-#  Redistribution and use in source and binary forms,
-#  with or without modification, are permitted provided that the
-#  following conditions are met:
-#   * Redistributions of source code must retain the above copyright
-#     notice, this list of conditions and the following disclaimer.
-#   * Redistributions in binary form must reproduce the above copyright
-#     notice, this list of conditions and the following disclaimer in the
-#     documentation and/or other materials provided with the distribution.
-#   * Neither the name of the Sony Computer Entertainment Inc nor the names
-#     of its contributors may be used to endorse or promote products derived
-#     from this software without specific prior written permission.
-#
-#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-#  POSSIBILITY OF SUCH DAMAGE.
-#
-
-$file1 = $ARGV[0];
-$file2 = $ARGV[1];
-
-if (!open(FILE1, "<$file1"))
-{
-    print "Couldn't open $file1\n";
-    exit;
-}
-
-if (!open(FILE2, "<$file2"))
-{
-    print "Couldn't open $file2\n";
-    exit;
-}
-
-print "Comparing $file1 $file2\n";
-
-$lineno1 = 0;
-$lineno2 = 0;
-
-while(($line1 = <FILE1>) && ($line2 = <FILE2>))
-{
-   $lineno1++;
-   $lineno2++;
-   
-   if ( $line1 =~ m/\:$/ )
-   {
-      $line1 = <FILE1>;
-      $lineno1++;
-   }
-   
-   if ( $line2 =~ m/\:$/ )
-   {
-      $line2 = <FILE2>;
-      $lineno2++;
-   }
-
-   $line1 =~ s/^.*\: //g;
-   $line2 =~ s/^.*\: //g;
-
-   @words1 = split(/ /,$line1);
-   @words2 = split(/ /,$line2);
-   
-   for ($i = 0; $i < @words1; $i++)
-   {
-      $word1 = $words1[$i];
-      $word2 = $words2[$i];
-      
-      $word1 =~ s/\s//g;
-      $word2 =~ s/\s//g;
-      
-      if ( $word1 ne $word2 )
-      {
-         $error = abs($word1 - $word2);
-
-         $limit = abs(1e-4 * $word1);
-         
-         if ( $error > $limit && !( abs($word1) < 1e-4 && $error < 1e-4 ) )
-         {
-            print "$lineno1: $word1 $lineno2: $word2\n";
-         }
-      }
-   }
-}
+#!/usr/bin/perl
+
+#
+#  Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms,
+#  with or without modification, are permitted provided that the
+#  following conditions are met:
+#   * Redistributions of source code must retain the above copyright
+#     notice, this list of conditions and the following disclaimer.
+#   * Redistributions in binary form must reproduce the above copyright
+#     notice, this list of conditions and the following disclaimer in the
+#     documentation and/or other materials provided with the distribution.
+#   * Neither the name of the Sony Computer Entertainment Inc nor the names
+#     of its contributors may be used to endorse or promote products derived
+#     from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+#  POSSIBILITY OF SUCH DAMAGE.
+#
+
+$file1 = $ARGV[0];
+$file2 = $ARGV[1];
+
+if (!open(FILE1, "<$file1"))
+{
+    print "Couldn't open $file1\n";
+    exit;
+}
+
+if (!open(FILE2, "<$file2"))
+{
+    print "Couldn't open $file2\n";
+    exit;
+}
+
+print "Comparing $file1 $file2\n";
+
+$lineno1 = 0;
+$lineno2 = 0;
+
+while(($line1 = <FILE1>) && ($line2 = <FILE2>))
+{
+   $lineno1++;
+   $lineno2++;
+   
+   if ( $line1 =~ m/\:$/ )
+   {
+      $line1 = <FILE1>;
+      $lineno1++;
+   }
+   
+   if ( $line2 =~ m/\:$/ )
+   {
+      $line2 = <FILE2>;
+      $lineno2++;
+   }
+
+   $line1 =~ s/^.*\: //g;
+   $line2 =~ s/^.*\: //g;
+
+   @words1 = split(/ /,$line1);
+   @words2 = split(/ /,$line2);
+   
+   for ($i = 0; $i < @words1; $i++)
+   {
+      $word1 = $words1[$i];
+      $word2 = $words2[$i];
+      
+      $word1 =~ s/\s//g;
+      $word2 =~ s/\s//g;
+      
+      if ( $word1 ne $word2 )
+      {
+         $error = abs($word1 - $word2);
+
+         $limit = abs(1e-4 * $word1);
+         
+         if ( $error > $limit && !( abs($word1) < 1e-4 && $error < 1e-4 ) )
+         {
+            print "$lineno1: $word1 $lineno2: $word2\n";
+         }
+      }
+   }
+}
diff --git a/Extras/vectormathlibrary/vectormath.spec b/Extras/vectormathlibrary/vectormath.spec
index 2d739c317..9507485c1 100644
--- a/Extras/vectormathlibrary/vectormath.spec
+++ b/Extras/vectormathlibrary/vectormath.spec
@@ -1,81 +1,81 @@
-%define lib_version 1.0.1
-
-Summary: Vector math library
-Name: vectormath
-Version: %{lib_version}
-Release: 1
-License: BSD
-Group: Development/Libraries
-Source0: %{name}-%{version}.tar.gz
-BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
-
-%description
-Vector math library.
-
-%ifarch ppc ppc64
-%define _lib_arch ppu
-%endif
-
-%ifarch i386 x86_64
-%define _lib_arch SSE
-%endif
-
-%if %{undefined _lib_arch}
-%define _lib_arch scalar
-%endif
-
-%package -n %{name}-devel
-Summary: Vector math library.
-Group: Development/Libraries
-%ifarch ppc ppc64
-Requires: simdmath-devel
-%endif
-
-%description -n %{name}-devel
-Vector math library.
-
-%ifarch ppc ppc64
-%package -n spu-%{name}-devel
-Summary: Vector math library.
-Group: Development/Libraries
-Requires: spu-simdmath-devel
-
-%description -n spu-%{name}-devel
-Vector math library.
-%endif
-
-%prep
-%setup -q
-
-%build
-
-%install
-rm -rf %{buildroot}
-
-make ARCH=%{_lib_arch} DESTDIR=%{buildroot} install
-
-%ifarch ppc ppc64
-make ARCH=spu DESTDIR=%{buildroot} install
-%endif
-
-mkdir -p %{buildroot}/%{_docdir}/%{name}-%{version}
-cp README LICENSE doc/*.pdf %{buildroot}/%{_docdir}/%{name}-%{version}/
-
-
-%clean
-rm -rf %{buildroot}
-
-%files -n %{name}-devel
-%defattr(-,root,root,-)
-%{_includedir}/*
-%{_docdir}/*
-
-%ifarch ppc ppc64
-%files -n spu-%{name}-devel
-%defattr(-,root,root,-)
-%{_prefix}/spu/include/*
-%endif
-
-%changelog
-* Wed Aug  8 2007 Kazunori Asayama <asayama@sm.sony.co.jp> - 1.0.1-1
-- Initial build.
+%define lib_version 1.0.1
+
+Summary: Vector math library
+Name: vectormath
+Version: %{lib_version}
+Release: 1
+License: BSD
+Group: Development/Libraries
+Source0: %{name}-%{version}.tar.gz
+BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
+
+%description
+Vector math library.
+
+%ifarch ppc ppc64
+%define _lib_arch ppu
+%endif
+
+%ifarch i386 x86_64
+%define _lib_arch SSE
+%endif
+
+%if %{undefined _lib_arch}
+%define _lib_arch scalar
+%endif
+
+%package -n %{name}-devel
+Summary: Vector math library.
+Group: Development/Libraries
+%ifarch ppc ppc64
+Requires: simdmath-devel
+%endif
+
+%description -n %{name}-devel
+Vector math library.
+
+%ifarch ppc ppc64
+%package -n spu-%{name}-devel
+Summary: Vector math library.
+Group: Development/Libraries
+Requires: spu-simdmath-devel
+
+%description -n spu-%{name}-devel
+Vector math library.
+%endif
+
+%prep
+%setup -q
+
+%build
+
+%install
+rm -rf %{buildroot}
+
+make ARCH=%{_lib_arch} DESTDIR=%{buildroot} install
+
+%ifarch ppc ppc64
+make ARCH=spu DESTDIR=%{buildroot} install
+%endif
+
+mkdir -p %{buildroot}/%{_docdir}/%{name}-%{version}
+cp README LICENSE doc/*.pdf %{buildroot}/%{_docdir}/%{name}-%{version}/
+
+
+%clean
+rm -rf %{buildroot}
+
+%files -n %{name}-devel
+%defattr(-,root,root,-)
+%{_includedir}/*
+%{_docdir}/*
+
+%ifarch ppc ppc64
+%files -n spu-%{name}-devel
+%defattr(-,root,root,-)
+%{_prefix}/spu/include/*
+%endif
+
+%changelog
+* Wed Aug  8 2007 Kazunori Asayama <asayama@sm.sony.co.jp> - 1.0.1-1
+- Initial build.