Add preliminary PhysX 4.0 backend for PyBullet

Add inverse dynamics / mass matrix code from DeepMimic, thanks to Xue Bin (Jason) Peng Add example how to use stable PD control for humanoid with spherical joints (see humanoidMotionCapture.py) Fix related to TinyRenderer object transforms not updating when using collision filtering
2019-01-22 21:08:37 -08:00
parent 80684f44ea
commit ae8e83988b
366 changed files with 131855 additions and 359 deletions
--- a/examples/ThirdPartyLibs/Eigen/src/LU/Determinant.h
+++ b/examples/ThirdPartyLibs/Eigen/src/LU/Determinant.h
@@ -0,0 +1,101 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_DETERMINANT_H
+#define EIGEN_DETERMINANT_H
+
+namespace Eigen { 
+
+namespace internal {
+
+template<typename Derived>
+inline const typename Derived::Scalar bruteforce_det3_helper
+(const MatrixBase<Derived>& matrix, int a, int b, int c)
+{
+  return matrix.coeff(0,a)
+         * (matrix.coeff(1,b) * matrix.coeff(2,c) - matrix.coeff(1,c) * matrix.coeff(2,b));
+}
+
+template<typename Derived>
+const typename Derived::Scalar bruteforce_det4_helper
+(const MatrixBase<Derived>& matrix, int j, int k, int m, int n)
+{
+  return (matrix.coeff(j,0) * matrix.coeff(k,1) - matrix.coeff(k,0) * matrix.coeff(j,1))
+       * (matrix.coeff(m,2) * matrix.coeff(n,3) - matrix.coeff(n,2) * matrix.coeff(m,3));
+}
+
+template<typename Derived,
+         int DeterminantType = Derived::RowsAtCompileTime
+> struct determinant_impl
+{
+  static inline typename traits<Derived>::Scalar run(const Derived& m)
+  {
+    if(Derived::ColsAtCompileTime==Dynamic && m.rows()==0)
+      return typename traits<Derived>::Scalar(1);
+    return m.partialPivLu().determinant();
+  }
+};
+
+template<typename Derived> struct determinant_impl<Derived, 1>
+{
+  static inline typename traits<Derived>::Scalar run(const Derived& m)
+  {
+    return m.coeff(0,0);
+  }
+};
+
+template<typename Derived> struct determinant_impl<Derived, 2>
+{
+  static inline typename traits<Derived>::Scalar run(const Derived& m)
+  {
+    return m.coeff(0,0) * m.coeff(1,1) - m.coeff(1,0) * m.coeff(0,1);
+  }
+};
+
+template<typename Derived> struct determinant_impl<Derived, 3>
+{
+  static inline typename traits<Derived>::Scalar run(const Derived& m)
+  {
+    return bruteforce_det3_helper(m,0,1,2)
+          - bruteforce_det3_helper(m,1,0,2)
+          + bruteforce_det3_helper(m,2,0,1);
+  }
+};
+
+template<typename Derived> struct determinant_impl<Derived, 4>
+{
+  static typename traits<Derived>::Scalar run(const Derived& m)
+  {
+    // trick by Martin Costabel to compute 4x4 det with only 30 muls
+    return bruteforce_det4_helper(m,0,1,2,3)
+          - bruteforce_det4_helper(m,0,2,1,3)
+          + bruteforce_det4_helper(m,0,3,1,2)
+          + bruteforce_det4_helper(m,1,2,0,3)
+          - bruteforce_det4_helper(m,1,3,0,2)
+          + bruteforce_det4_helper(m,2,3,0,1);
+  }
+};
+
+} // end namespace internal
+
+/** \lu_module
+  *
+  * \returns the determinant of this matrix
+  */
+template<typename Derived>
+inline typename internal::traits<Derived>::Scalar MatrixBase<Derived>::determinant() const
+{
+  eigen_assert(rows() == cols());
+  typedef typename internal::nested_eval<Derived,Base::RowsAtCompileTime>::type Nested;
+  return internal::determinant_impl<typename internal::remove_all<Nested>::type>::run(derived());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_DETERMINANT_H
--- a/examples/ThirdPartyLibs/Eigen/src/LU/FullPivLU.h
+++ b/examples/ThirdPartyLibs/Eigen/src/LU/FullPivLU.h
@@ -0,0 +1,889 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_LU_H
+#define EIGEN_LU_H
+
+namespace Eigen {
+
+namespace internal {
+template<typename _MatrixType> struct traits<FullPivLU<_MatrixType> >
+ : traits<_MatrixType>
+{
+  typedef MatrixXpr XprKind;
+  typedef SolverStorage StorageKind;
+  enum { Flags = 0 };
+};
+
+} // end namespace internal
+
+/** \ingroup LU_Module
+  *
+  * \class FullPivLU
+  *
+  * \brief LU decomposition of a matrix with complete pivoting, and related features
+  *
+  * \tparam _MatrixType the type of the matrix of which we are computing the LU decomposition
+  *
+  * This class represents a LU decomposition of any matrix, with complete pivoting: the matrix A is
+  * decomposed as \f$ A = P^{-1} L U Q^{-1} \f$ where L is unit-lower-triangular, U is
+  * upper-triangular, and P and Q are permutation matrices. This is a rank-revealing LU
+  * decomposition. The eigenvalues (diagonal coefficients) of U are sorted in such a way that any
+  * zeros are at the end.
+  *
+  * This decomposition provides the generic approach to solving systems of linear equations, computing
+  * the rank, invertibility, inverse, kernel, and determinant.
+  *
+  * This LU decomposition is very stable and well tested with large matrices. However there are use cases where the SVD
+  * decomposition is inherently more stable and/or flexible. For example, when computing the kernel of a matrix,
+  * working with the SVD allows to select the smallest singular values of the matrix, something that
+  * the LU decomposition doesn't see.
+  *
+  * The data of the LU decomposition can be directly accessed through the methods matrixLU(),
+  * permutationP(), permutationQ().
+  *
+  * As an exemple, here is how the original matrix can be retrieved:
+  * \include class_FullPivLU.cpp
+  * Output: \verbinclude class_FullPivLU.out
+  *
+  * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
+  * 
+  * \sa MatrixBase::fullPivLu(), MatrixBase::determinant(), MatrixBase::inverse()
+  */
+template<typename _MatrixType> class FullPivLU
+  : public SolverBase<FullPivLU<_MatrixType> >
+{
+  public:
+    typedef _MatrixType MatrixType;
+    typedef SolverBase<FullPivLU> Base;
+
+    EIGEN_GENERIC_PUBLIC_INTERFACE(FullPivLU)
+    // FIXME StorageIndex defined in EIGEN_GENERIC_PUBLIC_INTERFACE should be int
+    enum {
+      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+    };
+    typedef typename internal::plain_row_type<MatrixType, StorageIndex>::type IntRowVectorType;
+    typedef typename internal::plain_col_type<MatrixType, StorageIndex>::type IntColVectorType;
+    typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationQType;
+    typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationPType;
+    typedef typename MatrixType::PlainObject PlainObject;
+
+    /**
+      * \brief Default Constructor.
+      *
+      * The default constructor is useful in cases in which the user intends to
+      * perform decompositions via LU::compute(const MatrixType&).
+      */
+    FullPivLU();
+
+    /** \brief Default Constructor with memory preallocation
+      *
+      * Like the default constructor but with preallocation of the internal data
+      * according to the specified problem \a size.
+      * \sa FullPivLU()
+      */
+    FullPivLU(Index rows, Index cols);
+
+    /** Constructor.
+      *
+      * \param matrix the matrix of which to compute the LU decomposition.
+      *               It is required to be nonzero.
+      */
+    template<typename InputType>
+    explicit FullPivLU(const EigenBase<InputType>& matrix);
+
+    /** \brief Constructs a LU factorization from a given matrix
+      *
+      * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when \c MatrixType is a Eigen::Ref.
+      *
+      * \sa FullPivLU(const EigenBase&)
+      */
+    template<typename InputType>
+    explicit FullPivLU(EigenBase<InputType>& matrix);
+
+    /** Computes the LU decomposition of the given matrix.
+      *
+      * \param matrix the matrix of which to compute the LU decomposition.
+      *               It is required to be nonzero.
+      *
+      * \returns a reference to *this
+      */
+    template<typename InputType>
+    FullPivLU& compute(const EigenBase<InputType>& matrix) {
+      m_lu = matrix.derived();
+      computeInPlace();
+      return *this;
+    }
+
+    /** \returns the LU decomposition matrix: the upper-triangular part is U, the
+      * unit-lower-triangular part is L (at least for square matrices; in the non-square
+      * case, special care is needed, see the documentation of class FullPivLU).
+      *
+      * \sa matrixL(), matrixU()
+      */
+    inline const MatrixType& matrixLU() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return m_lu;
+    }
+
+    /** \returns the number of nonzero pivots in the LU decomposition.
+      * Here nonzero is meant in the exact sense, not in a fuzzy sense.
+      * So that notion isn't really intrinsically interesting, but it is
+      * still useful when implementing algorithms.
+      *
+      * \sa rank()
+      */
+    inline Index nonzeroPivots() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return m_nonzero_pivots;
+    }
+
+    /** \returns the absolute value of the biggest pivot, i.e. the biggest
+      *          diagonal coefficient of U.
+      */
+    RealScalar maxPivot() const { return m_maxpivot; }
+
+    /** \returns the permutation matrix P
+      *
+      * \sa permutationQ()
+      */
+    EIGEN_DEVICE_FUNC inline const PermutationPType& permutationP() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return m_p;
+    }
+
+    /** \returns the permutation matrix Q
+      *
+      * \sa permutationP()
+      */
+    inline const PermutationQType& permutationQ() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return m_q;
+    }
+
+    /** \returns the kernel of the matrix, also called its null-space. The columns of the returned matrix
+      * will form a basis of the kernel.
+      *
+      * \note If the kernel has dimension zero, then the returned matrix is a column-vector filled with zeros.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      *
+      * Example: \include FullPivLU_kernel.cpp
+      * Output: \verbinclude FullPivLU_kernel.out
+      *
+      * \sa image()
+      */
+    inline const internal::kernel_retval<FullPivLU> kernel() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return internal::kernel_retval<FullPivLU>(*this);
+    }
+
+    /** \returns the image of the matrix, also called its column-space. The columns of the returned matrix
+      * will form a basis of the image (column-space).
+      *
+      * \param originalMatrix the original matrix, of which *this is the LU decomposition.
+      *                       The reason why it is needed to pass it here, is that this allows
+      *                       a large optimization, as otherwise this method would need to reconstruct it
+      *                       from the LU decomposition.
+      *
+      * \note If the image has dimension zero, then the returned matrix is a column-vector filled with zeros.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      *
+      * Example: \include FullPivLU_image.cpp
+      * Output: \verbinclude FullPivLU_image.out
+      *
+      * \sa kernel()
+      */
+    inline const internal::image_retval<FullPivLU>
+      image(const MatrixType& originalMatrix) const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return internal::image_retval<FullPivLU>(*this, originalMatrix);
+    }
+
+    /** \return a solution x to the equation Ax=b, where A is the matrix of which
+      * *this is the LU decomposition.
+      *
+      * \param b the right-hand-side of the equation to solve. Can be a vector or a matrix,
+      *          the only requirement in order for the equation to make sense is that
+      *          b.rows()==A.rows(), where A is the matrix of which *this is the LU decomposition.
+      *
+      * \returns a solution.
+      *
+      * \note_about_checking_solutions
+      *
+      * \note_about_arbitrary_choice_of_solution
+      * \note_about_using_kernel_to_study_multiple_solutions
+      *
+      * Example: \include FullPivLU_solve.cpp
+      * Output: \verbinclude FullPivLU_solve.out
+      *
+      * \sa TriangularView::solve(), kernel(), inverse()
+      */
+    // FIXME this is a copy-paste of the base-class member to add the isInitialized assertion.
+    template<typename Rhs>
+    inline const Solve<FullPivLU, Rhs>
+    solve(const MatrixBase<Rhs>& b) const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return Solve<FullPivLU, Rhs>(*this, b.derived());
+    }
+
+    /** \returns an estimate of the reciprocal condition number of the matrix of which \c *this is
+        the LU decomposition.
+      */
+    inline RealScalar rcond() const
+    {
+      eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
+      return internal::rcond_estimate_helper(m_l1_norm, *this);
+    }
+
+    /** \returns the determinant of the matrix of which
+      * *this is the LU decomposition. It has only linear complexity
+      * (that is, O(n) where n is the dimension of the square matrix)
+      * as the LU decomposition has already been computed.
+      *
+      * \note This is only for square matrices.
+      *
+      * \note For fixed-size matrices of size up to 4, MatrixBase::determinant() offers
+      *       optimized paths.
+      *
+      * \warning a determinant can be very big or small, so for matrices
+      * of large enough dimension, there is a risk of overflow/underflow.
+      *
+      * \sa MatrixBase::determinant()
+      */
+    typename internal::traits<MatrixType>::Scalar determinant() const;
+
+    /** Allows to prescribe a threshold to be used by certain methods, such as rank(),
+      * who need to determine when pivots are to be considered nonzero. This is not used for the
+      * LU decomposition itself.
+      *
+      * When it needs to get the threshold value, Eigen calls threshold(). By default, this
+      * uses a formula to automatically determine a reasonable threshold.
+      * Once you have called the present method setThreshold(const RealScalar&),
+      * your value is used instead.
+      *
+      * \param threshold The new value to use as the threshold.
+      *
+      * A pivot will be considered nonzero if its absolute value is strictly greater than
+      *  \f$ \vert pivot \vert \leqslant threshold \times \vert maxpivot \vert \f$
+      * where maxpivot is the biggest pivot.
+      *
+      * If you want to come back to the default behavior, call setThreshold(Default_t)
+      */
+    FullPivLU& setThreshold(const RealScalar& threshold)
+    {
+      m_usePrescribedThreshold = true;
+      m_prescribedThreshold = threshold;
+      return *this;
+    }
+
+    /** Allows to come back to the default behavior, letting Eigen use its default formula for
+      * determining the threshold.
+      *
+      * You should pass the special object Eigen::Default as parameter here.
+      * \code lu.setThreshold(Eigen::Default); \endcode
+      *
+      * See the documentation of setThreshold(const RealScalar&).
+      */
+    FullPivLU& setThreshold(Default_t)
+    {
+      m_usePrescribedThreshold = false;
+      return *this;
+    }
+
+    /** Returns the threshold that will be used by certain methods such as rank().
+      *
+      * See the documentation of setThreshold(const RealScalar&).
+      */
+    RealScalar threshold() const
+    {
+      eigen_assert(m_isInitialized || m_usePrescribedThreshold);
+      return m_usePrescribedThreshold ? m_prescribedThreshold
+      // this formula comes from experimenting (see "LU precision tuning" thread on the list)
+      // and turns out to be identical to Higham's formula used already in LDLt.
+                                      : NumTraits<Scalar>::epsilon() * m_lu.diagonalSize();
+    }
+
+    /** \returns the rank of the matrix of which *this is the LU decomposition.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline Index rank() const
+    {
+      using std::abs;
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      RealScalar premultiplied_threshold = abs(m_maxpivot) * threshold();
+      Index result = 0;
+      for(Index i = 0; i < m_nonzero_pivots; ++i)
+        result += (abs(m_lu.coeff(i,i)) > premultiplied_threshold);
+      return result;
+    }
+
+    /** \returns the dimension of the kernel of the matrix of which *this is the LU decomposition.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline Index dimensionOfKernel() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return cols() - rank();
+    }
+
+    /** \returns true if the matrix of which *this is the LU decomposition represents an injective
+      *          linear map, i.e. has trivial kernel; false otherwise.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline bool isInjective() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return rank() == cols();
+    }
+
+    /** \returns true if the matrix of which *this is the LU decomposition represents a surjective
+      *          linear map; false otherwise.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline bool isSurjective() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return rank() == rows();
+    }
+
+    /** \returns true if the matrix of which *this is the LU decomposition is invertible.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline bool isInvertible() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return isInjective() && (m_lu.rows() == m_lu.cols());
+    }
+
+    /** \returns the inverse of the matrix of which *this is the LU decomposition.
+      *
+      * \note If this matrix is not invertible, the returned matrix has undefined coefficients.
+      *       Use isInvertible() to first determine whether this matrix is invertible.
+      *
+      * \sa MatrixBase::inverse()
+      */
+    inline const Inverse<FullPivLU> inverse() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      eigen_assert(m_lu.rows() == m_lu.cols() && "You can't take the inverse of a non-square matrix!");
+      return Inverse<FullPivLU>(*this);
+    }
+
+    MatrixType reconstructedMatrix() const;
+
+    EIGEN_DEVICE_FUNC inline Index rows() const { return m_lu.rows(); }
+    EIGEN_DEVICE_FUNC inline Index cols() const { return m_lu.cols(); }
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename RhsType, typename DstType>
+    void _solve_impl(const RhsType &rhs, DstType &dst) const;
+
+    template<bool Conjugate, typename RhsType, typename DstType>
+    void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const;
+    #endif
+
+  protected:
+
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+    }
+
+    void computeInPlace();
+
+    MatrixType m_lu;
+    PermutationPType m_p;
+    PermutationQType m_q;
+    IntColVectorType m_rowsTranspositions;
+    IntRowVectorType m_colsTranspositions;
+    Index m_nonzero_pivots;
+    RealScalar m_l1_norm;
+    RealScalar m_maxpivot, m_prescribedThreshold;
+    signed char m_det_pq;
+    bool m_isInitialized, m_usePrescribedThreshold;
+};
+
+template<typename MatrixType>
+FullPivLU<MatrixType>::FullPivLU()
+  : m_isInitialized(false), m_usePrescribedThreshold(false)
+{
+}
+
+template<typename MatrixType>
+FullPivLU<MatrixType>::FullPivLU(Index rows, Index cols)
+  : m_lu(rows, cols),
+    m_p(rows),
+    m_q(cols),
+    m_rowsTranspositions(rows),
+    m_colsTranspositions(cols),
+    m_isInitialized(false),
+    m_usePrescribedThreshold(false)
+{
+}
+
+template<typename MatrixType>
+template<typename InputType>
+FullPivLU<MatrixType>::FullPivLU(const EigenBase<InputType>& matrix)
+  : m_lu(matrix.rows(), matrix.cols()),
+    m_p(matrix.rows()),
+    m_q(matrix.cols()),
+    m_rowsTranspositions(matrix.rows()),
+    m_colsTranspositions(matrix.cols()),
+    m_isInitialized(false),
+    m_usePrescribedThreshold(false)
+{
+  compute(matrix.derived());
+}
+
+template<typename MatrixType>
+template<typename InputType>
+FullPivLU<MatrixType>::FullPivLU(EigenBase<InputType>& matrix)
+  : m_lu(matrix.derived()),
+    m_p(matrix.rows()),
+    m_q(matrix.cols()),
+    m_rowsTranspositions(matrix.rows()),
+    m_colsTranspositions(matrix.cols()),
+    m_isInitialized(false),
+    m_usePrescribedThreshold(false)
+{
+  computeInPlace();
+}
+
+template<typename MatrixType>
+void FullPivLU<MatrixType>::computeInPlace()
+{
+  check_template_parameters();
+
+  // the permutations are stored as int indices, so just to be sure:
+  eigen_assert(m_lu.rows()<=NumTraits<int>::highest() && m_lu.cols()<=NumTraits<int>::highest());
+
+  m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff();
+
+  const Index size = m_lu.diagonalSize();
+  const Index rows = m_lu.rows();
+  const Index cols = m_lu.cols();
+
+  // will store the transpositions, before we accumulate them at the end.
+  // can't accumulate on-the-fly because that will be done in reverse order for the rows.
+  m_rowsTranspositions.resize(m_lu.rows());
+  m_colsTranspositions.resize(m_lu.cols());
+  Index number_of_transpositions = 0; // number of NONTRIVIAL transpositions, i.e. m_rowsTranspositions[i]!=i
+
+  m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case)
+  m_maxpivot = RealScalar(0);
+
+  for(Index k = 0; k < size; ++k)
+  {
+    // First, we need to find the pivot.
+
+    // biggest coefficient in the remaining bottom-right corner (starting at row k, col k)
+    Index row_of_biggest_in_corner, col_of_biggest_in_corner;
+    typedef internal::scalar_score_coeff_op<Scalar> Scoring;
+    typedef typename Scoring::result_type Score;
+    Score biggest_in_corner;
+    biggest_in_corner = m_lu.bottomRightCorner(rows-k, cols-k)
+                        .unaryExpr(Scoring())
+                        .maxCoeff(&row_of_biggest_in_corner, &col_of_biggest_in_corner);
+    row_of_biggest_in_corner += k; // correct the values! since they were computed in the corner,
+    col_of_biggest_in_corner += k; // need to add k to them.
+
+    if(biggest_in_corner==Score(0))
+    {
+      // before exiting, make sure to initialize the still uninitialized transpositions
+      // in a sane state without destroying what we already have.
+      m_nonzero_pivots = k;
+      for(Index i = k; i < size; ++i)
+      {
+        m_rowsTranspositions.coeffRef(i) = i;
+        m_colsTranspositions.coeffRef(i) = i;
+      }
+      break;
+    }
+
+    RealScalar abs_pivot = internal::abs_knowing_score<Scalar>()(m_lu(row_of_biggest_in_corner, col_of_biggest_in_corner), biggest_in_corner);
+    if(abs_pivot > m_maxpivot) m_maxpivot = abs_pivot;
+
+    // Now that we've found the pivot, we need to apply the row/col swaps to
+    // bring it to the location (k,k).
+
+    m_rowsTranspositions.coeffRef(k) = row_of_biggest_in_corner;
+    m_colsTranspositions.coeffRef(k) = col_of_biggest_in_corner;
+    if(k != row_of_biggest_in_corner) {
+      m_lu.row(k).swap(m_lu.row(row_of_biggest_in_corner));
+      ++number_of_transpositions;
+    }
+    if(k != col_of_biggest_in_corner) {
+      m_lu.col(k).swap(m_lu.col(col_of_biggest_in_corner));
+      ++number_of_transpositions;
+    }
+
+    // Now that the pivot is at the right location, we update the remaining
+    // bottom-right corner by Gaussian elimination.
+
+    if(k<rows-1)
+      m_lu.col(k).tail(rows-k-1) /= m_lu.coeff(k,k);
+    if(k<size-1)
+      m_lu.block(k+1,k+1,rows-k-1,cols-k-1).noalias() -= m_lu.col(k).tail(rows-k-1) * m_lu.row(k).tail(cols-k-1);
+  }
+
+  // the main loop is over, we still have to accumulate the transpositions to find the
+  // permutations P and Q
+
+  m_p.setIdentity(rows);
+  for(Index k = size-1; k >= 0; --k)
+    m_p.applyTranspositionOnTheRight(k, m_rowsTranspositions.coeff(k));
+
+  m_q.setIdentity(cols);
+  for(Index k = 0; k < size; ++k)
+    m_q.applyTranspositionOnTheRight(k, m_colsTranspositions.coeff(k));
+
+  m_det_pq = (number_of_transpositions%2) ? -1 : 1;
+
+  m_isInitialized = true;
+}
+
+template<typename MatrixType>
+typename internal::traits<MatrixType>::Scalar FullPivLU<MatrixType>::determinant() const
+{
+  eigen_assert(m_isInitialized && "LU is not initialized.");
+  eigen_assert(m_lu.rows() == m_lu.cols() && "You can't take the determinant of a non-square matrix!");
+  return Scalar(m_det_pq) * Scalar(m_lu.diagonal().prod());
+}
+
+/** \returns the matrix represented by the decomposition,
+ * i.e., it returns the product: \f$ P^{-1} L U Q^{-1} \f$.
+ * This function is provided for debug purposes. */
+template<typename MatrixType>
+MatrixType FullPivLU<MatrixType>::reconstructedMatrix() const
+{
+  eigen_assert(m_isInitialized && "LU is not initialized.");
+  const Index smalldim = (std::min)(m_lu.rows(), m_lu.cols());
+  // LU
+  MatrixType res(m_lu.rows(),m_lu.cols());
+  // FIXME the .toDenseMatrix() should not be needed...
+  res = m_lu.leftCols(smalldim)
+            .template triangularView<UnitLower>().toDenseMatrix()
+      * m_lu.topRows(smalldim)
+            .template triangularView<Upper>().toDenseMatrix();
+
+  // P^{-1}(LU)
+  res = m_p.inverse() * res;
+
+  // (P^{-1}LU)Q^{-1}
+  res = res * m_q.inverse();
+
+  return res;
+}
+
+/********* Implementation of kernel() **************************************************/
+
+namespace internal {
+template<typename _MatrixType>
+struct kernel_retval<FullPivLU<_MatrixType> >
+  : kernel_retval_base<FullPivLU<_MatrixType> >
+{
+  EIGEN_MAKE_KERNEL_HELPERS(FullPivLU<_MatrixType>)
+
+  enum { MaxSmallDimAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(
+            MatrixType::MaxColsAtCompileTime,
+            MatrixType::MaxRowsAtCompileTime)
+  };
+
+  template<typename Dest> void evalTo(Dest& dst) const
+  {
+    using std::abs;
+    const Index cols = dec().matrixLU().cols(), dimker = cols - rank();
+    if(dimker == 0)
+    {
+      // The Kernel is just {0}, so it doesn't have a basis properly speaking, but let's
+      // avoid crashing/asserting as that depends on floating point calculations. Let's
+      // just return a single column vector filled with zeros.
+      dst.setZero();
+      return;
+    }
+
+    /* Let us use the following lemma:
+      *
+      * Lemma: If the matrix A has the LU decomposition PAQ = LU,
+      * then Ker A = Q(Ker U).
+      *
+      * Proof: trivial: just keep in mind that P, Q, L are invertible.
+      */
+
+    /* Thus, all we need to do is to compute Ker U, and then apply Q.
+      *
+      * U is upper triangular, with eigenvalues sorted so that any zeros appear at the end.
+      * Thus, the diagonal of U ends with exactly
+      * dimKer zero's. Let us use that to construct dimKer linearly
+      * independent vectors in Ker U.
+      */
+
+    Matrix<Index, Dynamic, 1, 0, MaxSmallDimAtCompileTime, 1> pivots(rank());
+    RealScalar premultiplied_threshold = dec().maxPivot() * dec().threshold();
+    Index p = 0;
+    for(Index i = 0; i < dec().nonzeroPivots(); ++i)
+      if(abs(dec().matrixLU().coeff(i,i)) > premultiplied_threshold)
+        pivots.coeffRef(p++) = i;
+    eigen_internal_assert(p == rank());
+
+    // we construct a temporaty trapezoid matrix m, by taking the U matrix and
+    // permuting the rows and cols to bring the nonnegligible pivots to the top of
+    // the main diagonal. We need that to be able to apply our triangular solvers.
+    // FIXME when we get triangularView-for-rectangular-matrices, this can be simplified
+    Matrix<typename MatrixType::Scalar, Dynamic, Dynamic, MatrixType::Options,
+           MaxSmallDimAtCompileTime, MatrixType::MaxColsAtCompileTime>
+      m(dec().matrixLU().block(0, 0, rank(), cols));
+    for(Index i = 0; i < rank(); ++i)
+    {
+      if(i) m.row(i).head(i).setZero();
+      m.row(i).tail(cols-i) = dec().matrixLU().row(pivots.coeff(i)).tail(cols-i);
+    }
+    m.block(0, 0, rank(), rank());
+    m.block(0, 0, rank(), rank()).template triangularView<StrictlyLower>().setZero();
+    for(Index i = 0; i < rank(); ++i)
+      m.col(i).swap(m.col(pivots.coeff(i)));
+
+    // ok, we have our trapezoid matrix, we can apply the triangular solver.
+    // notice that the math behind this suggests that we should apply this to the
+    // negative of the RHS, but for performance we just put the negative sign elsewhere, see below.
+    m.topLeftCorner(rank(), rank())
+     .template triangularView<Upper>().solveInPlace(
+        m.topRightCorner(rank(), dimker)
+      );
+
+    // now we must undo the column permutation that we had applied!
+    for(Index i = rank()-1; i >= 0; --i)
+      m.col(i).swap(m.col(pivots.coeff(i)));
+
+    // see the negative sign in the next line, that's what we were talking about above.
+    for(Index i = 0; i < rank(); ++i) dst.row(dec().permutationQ().indices().coeff(i)) = -m.row(i).tail(dimker);
+    for(Index i = rank(); i < cols; ++i) dst.row(dec().permutationQ().indices().coeff(i)).setZero();
+    for(Index k = 0; k < dimker; ++k) dst.coeffRef(dec().permutationQ().indices().coeff(rank()+k), k) = Scalar(1);
+  }
+};
+
+/***** Implementation of image() *****************************************************/
+
+template<typename _MatrixType>
+struct image_retval<FullPivLU<_MatrixType> >
+  : image_retval_base<FullPivLU<_MatrixType> >
+{
+  EIGEN_MAKE_IMAGE_HELPERS(FullPivLU<_MatrixType>)
+
+  enum { MaxSmallDimAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(
+            MatrixType::MaxColsAtCompileTime,
+            MatrixType::MaxRowsAtCompileTime)
+  };
+
+  template<typename Dest> void evalTo(Dest& dst) const
+  {
+    using std::abs;
+    if(rank() == 0)
+    {
+      // The Image is just {0}, so it doesn't have a basis properly speaking, but let's
+      // avoid crashing/asserting as that depends on floating point calculations. Let's
+      // just return a single column vector filled with zeros.
+      dst.setZero();
+      return;
+    }
+
+    Matrix<Index, Dynamic, 1, 0, MaxSmallDimAtCompileTime, 1> pivots(rank());
+    RealScalar premultiplied_threshold = dec().maxPivot() * dec().threshold();
+    Index p = 0;
+    for(Index i = 0; i < dec().nonzeroPivots(); ++i)
+      if(abs(dec().matrixLU().coeff(i,i)) > premultiplied_threshold)
+        pivots.coeffRef(p++) = i;
+    eigen_internal_assert(p == rank());
+
+    for(Index i = 0; i < rank(); ++i)
+      dst.col(i) = originalMatrix().col(dec().permutationQ().indices().coeff(pivots.coeff(i)));
+  }
+};
+
+/***** Implementation of solve() *****************************************************/
+
+} // end namespace internal
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename _MatrixType>
+template<typename RhsType, typename DstType>
+void FullPivLU<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
+{
+  /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}.
+  * So we proceed as follows:
+  * Step 1: compute c = P * rhs.
+  * Step 2: replace c by the solution x to Lx = c. Exists because L is invertible.
+  * Step 3: replace c by the solution x to Ux = c. May or may not exist.
+  * Step 4: result = Q * c;
+  */
+
+  const Index rows = this->rows(),
+              cols = this->cols(),
+              nonzero_pivots = this->rank();
+  eigen_assert(rhs.rows() == rows);
+  const Index smalldim = (std::min)(rows, cols);
+
+  if(nonzero_pivots == 0)
+  {
+    dst.setZero();
+    return;
+  }
+
+  typename RhsType::PlainObject c(rhs.rows(), rhs.cols());
+
+  // Step 1
+  c = permutationP() * rhs;
+
+  // Step 2
+  m_lu.topLeftCorner(smalldim,smalldim)
+      .template triangularView<UnitLower>()
+      .solveInPlace(c.topRows(smalldim));
+  if(rows>cols)
+    c.bottomRows(rows-cols) -= m_lu.bottomRows(rows-cols) * c.topRows(cols);
+
+  // Step 3
+  m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots)
+      .template triangularView<Upper>()
+      .solveInPlace(c.topRows(nonzero_pivots));
+
+  // Step 4
+  for(Index i = 0; i < nonzero_pivots; ++i)
+    dst.row(permutationQ().indices().coeff(i)) = c.row(i);
+  for(Index i = nonzero_pivots; i < m_lu.cols(); ++i)
+    dst.row(permutationQ().indices().coeff(i)).setZero();
+}
+
+template<typename _MatrixType>
+template<bool Conjugate, typename RhsType, typename DstType>
+void FullPivLU<_MatrixType>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const
+{
+  /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1},
+   * and since permutations are real and unitary, we can write this
+   * as   A^T = Q U^T L^T P,
+   * So we proceed as follows:
+   * Step 1: compute c = Q^T rhs.
+   * Step 2: replace c by the solution x to U^T x = c. May or may not exist.
+   * Step 3: replace c by the solution x to L^T x = c.
+   * Step 4: result = P^T c.
+   * If Conjugate is true, replace "^T" by "^*" above.
+   */
+
+  const Index rows = this->rows(), cols = this->cols(),
+    nonzero_pivots = this->rank();
+   eigen_assert(rhs.rows() == cols);
+  const Index smalldim = (std::min)(rows, cols);
+
+  if(nonzero_pivots == 0)
+  {
+    dst.setZero();
+    return;
+  }
+
+  typename RhsType::PlainObject c(rhs.rows(), rhs.cols());
+
+  // Step 1
+  c = permutationQ().inverse() * rhs;
+
+  if (Conjugate) {
+    // Step 2
+    m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots)
+        .template triangularView<Upper>()
+        .adjoint()
+        .solveInPlace(c.topRows(nonzero_pivots));
+    // Step 3
+    m_lu.topLeftCorner(smalldim, smalldim)
+        .template triangularView<UnitLower>()
+        .adjoint()
+        .solveInPlace(c.topRows(smalldim));
+  } else {
+    // Step 2
+    m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots)
+        .template triangularView<Upper>()
+        .transpose()
+        .solveInPlace(c.topRows(nonzero_pivots));
+    // Step 3
+    m_lu.topLeftCorner(smalldim, smalldim)
+        .template triangularView<UnitLower>()
+        .transpose()
+        .solveInPlace(c.topRows(smalldim));
+  }
+
+  // Step 4
+  PermutationPType invp = permutationP().inverse().eval();
+  for(Index i = 0; i < smalldim; ++i)
+    dst.row(invp.indices().coeff(i)) = c.row(i);
+  for(Index i = smalldim; i < rows; ++i)
+    dst.row(invp.indices().coeff(i)).setZero();
+}
+
+#endif
+
+namespace internal {
+
+
+/***** Implementation of inverse() *****************************************************/
+template<typename DstXprType, typename MatrixType>
+struct Assignment<DstXprType, Inverse<FullPivLU<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename FullPivLU<MatrixType>::Scalar>, Dense2Dense>
+{
+  typedef FullPivLU<MatrixType> LuType;
+  typedef Inverse<LuType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename MatrixType::Scalar> &)
+  {
+    dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
+  }
+};
+} // end namespace internal
+
+/******* MatrixBase methods *****************************************************************/
+
+/** \lu_module
+  *
+  * \return the full-pivoting LU decomposition of \c *this.
+  *
+  * \sa class FullPivLU
+  */
+template<typename Derived>
+inline const FullPivLU<typename MatrixBase<Derived>::PlainObject>
+MatrixBase<Derived>::fullPivLu() const
+{
+  return FullPivLU<PlainObject>(eval());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_LU_H
--- a/examples/ThirdPartyLibs/Eigen/src/LU/InverseImpl.h
+++ b/examples/ThirdPartyLibs/Eigen/src/LU/InverseImpl.h
@@ -0,0 +1,415 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_INVERSE_IMPL_H
+#define EIGEN_INVERSE_IMPL_H
+
+namespace Eigen { 
+
+namespace internal {
+
+/**********************************
+*** General case implementation ***
+**********************************/
+
+template<typename MatrixType, typename ResultType, int Size = MatrixType::RowsAtCompileTime>
+struct compute_inverse
+{
+  EIGEN_DEVICE_FUNC
+  static inline void run(const MatrixType& matrix, ResultType& result)
+  {
+    result = matrix.partialPivLu().inverse();
+  }
+};
+
+template<typename MatrixType, typename ResultType, int Size = MatrixType::RowsAtCompileTime>
+struct compute_inverse_and_det_with_check { /* nothing! general case not supported. */ };
+
+/****************************
+*** Size 1 implementation ***
+****************************/
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse<MatrixType, ResultType, 1>
+{
+  EIGEN_DEVICE_FUNC
+  static inline void run(const MatrixType& matrix, ResultType& result)
+  {
+    typedef typename MatrixType::Scalar Scalar;
+    internal::evaluator<MatrixType> matrixEval(matrix);
+    result.coeffRef(0,0) = Scalar(1) / matrixEval.coeff(0,0);
+  }
+};
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse_and_det_with_check<MatrixType, ResultType, 1>
+{
+  EIGEN_DEVICE_FUNC
+  static inline void run(
+    const MatrixType& matrix,
+    const typename MatrixType::RealScalar& absDeterminantThreshold,
+    ResultType& result,
+    typename ResultType::Scalar& determinant,
+    bool& invertible
+  )
+  {
+    using std::abs;
+    determinant = matrix.coeff(0,0);
+    invertible = abs(determinant) > absDeterminantThreshold;
+    if(invertible) result.coeffRef(0,0) = typename ResultType::Scalar(1) / determinant;
+  }
+};
+
+/****************************
+*** Size 2 implementation ***
+****************************/
+
+template<typename MatrixType, typename ResultType>
+EIGEN_DEVICE_FUNC 
+inline void compute_inverse_size2_helper(
+    const MatrixType& matrix, const typename ResultType::Scalar& invdet,
+    ResultType& result)
+{
+  result.coeffRef(0,0) =  matrix.coeff(1,1) * invdet;
+  result.coeffRef(1,0) = -matrix.coeff(1,0) * invdet;
+  result.coeffRef(0,1) = -matrix.coeff(0,1) * invdet;
+  result.coeffRef(1,1) =  matrix.coeff(0,0) * invdet;
+}
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse<MatrixType, ResultType, 2>
+{
+  EIGEN_DEVICE_FUNC
+  static inline void run(const MatrixType& matrix, ResultType& result)
+  {
+    typedef typename ResultType::Scalar Scalar;
+    const Scalar invdet = typename MatrixType::Scalar(1) / matrix.determinant();
+    compute_inverse_size2_helper(matrix, invdet, result);
+  }
+};
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse_and_det_with_check<MatrixType, ResultType, 2>
+{
+  EIGEN_DEVICE_FUNC
+  static inline void run(
+    const MatrixType& matrix,
+    const typename MatrixType::RealScalar& absDeterminantThreshold,
+    ResultType& inverse,
+    typename ResultType::Scalar& determinant,
+    bool& invertible
+  )
+  {
+    using std::abs;
+    typedef typename ResultType::Scalar Scalar;
+    determinant = matrix.determinant();
+    invertible = abs(determinant) > absDeterminantThreshold;
+    if(!invertible) return;
+    const Scalar invdet = Scalar(1) / determinant;
+    compute_inverse_size2_helper(matrix, invdet, inverse);
+  }
+};
+
+/****************************
+*** Size 3 implementation ***
+****************************/
+
+template<typename MatrixType, int i, int j>
+EIGEN_DEVICE_FUNC 
+inline typename MatrixType::Scalar cofactor_3x3(const MatrixType& m)
+{
+  enum {
+    i1 = (i+1) % 3,
+    i2 = (i+2) % 3,
+    j1 = (j+1) % 3,
+    j2 = (j+2) % 3
+  };
+  return m.coeff(i1, j1) * m.coeff(i2, j2)
+       - m.coeff(i1, j2) * m.coeff(i2, j1);
+}
+
+template<typename MatrixType, typename ResultType>
+EIGEN_DEVICE_FUNC
+inline void compute_inverse_size3_helper(
+    const MatrixType& matrix,
+    const typename ResultType::Scalar& invdet,
+    const Matrix<typename ResultType::Scalar,3,1>& cofactors_col0,
+    ResultType& result)
+{
+  result.row(0) = cofactors_col0 * invdet;
+  result.coeffRef(1,0) =  cofactor_3x3<MatrixType,0,1>(matrix) * invdet;
+  result.coeffRef(1,1) =  cofactor_3x3<MatrixType,1,1>(matrix) * invdet;
+  result.coeffRef(1,2) =  cofactor_3x3<MatrixType,2,1>(matrix) * invdet;
+  result.coeffRef(2,0) =  cofactor_3x3<MatrixType,0,2>(matrix) * invdet;
+  result.coeffRef(2,1) =  cofactor_3x3<MatrixType,1,2>(matrix) * invdet;
+  result.coeffRef(2,2) =  cofactor_3x3<MatrixType,2,2>(matrix) * invdet;
+}
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse<MatrixType, ResultType, 3>
+{
+  EIGEN_DEVICE_FUNC
+  static inline void run(const MatrixType& matrix, ResultType& result)
+  {
+    typedef typename ResultType::Scalar Scalar;
+    Matrix<typename MatrixType::Scalar,3,1> cofactors_col0;
+    cofactors_col0.coeffRef(0) =  cofactor_3x3<MatrixType,0,0>(matrix);
+    cofactors_col0.coeffRef(1) =  cofactor_3x3<MatrixType,1,0>(matrix);
+    cofactors_col0.coeffRef(2) =  cofactor_3x3<MatrixType,2,0>(matrix);
+    const Scalar det = (cofactors_col0.cwiseProduct(matrix.col(0))).sum();
+    const Scalar invdet = Scalar(1) / det;
+    compute_inverse_size3_helper(matrix, invdet, cofactors_col0, result);
+  }
+};
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse_and_det_with_check<MatrixType, ResultType, 3>
+{
+  EIGEN_DEVICE_FUNC
+  static inline void run(
+    const MatrixType& matrix,
+    const typename MatrixType::RealScalar& absDeterminantThreshold,
+    ResultType& inverse,
+    typename ResultType::Scalar& determinant,
+    bool& invertible
+  )
+  {
+    using std::abs;
+    typedef typename ResultType::Scalar Scalar;
+    Matrix<Scalar,3,1> cofactors_col0;
+    cofactors_col0.coeffRef(0) =  cofactor_3x3<MatrixType,0,0>(matrix);
+    cofactors_col0.coeffRef(1) =  cofactor_3x3<MatrixType,1,0>(matrix);
+    cofactors_col0.coeffRef(2) =  cofactor_3x3<MatrixType,2,0>(matrix);
+    determinant = (cofactors_col0.cwiseProduct(matrix.col(0))).sum();
+    invertible = abs(determinant) > absDeterminantThreshold;
+    if(!invertible) return;
+    const Scalar invdet = Scalar(1) / determinant;
+    compute_inverse_size3_helper(matrix, invdet, cofactors_col0, inverse);
+  }
+};
+
+/****************************
+*** Size 4 implementation ***
+****************************/
+
+template<typename Derived>
+EIGEN_DEVICE_FUNC 
+inline const typename Derived::Scalar general_det3_helper
+(const MatrixBase<Derived>& matrix, int i1, int i2, int i3, int j1, int j2, int j3)
+{
+  return matrix.coeff(i1,j1)
+         * (matrix.coeff(i2,j2) * matrix.coeff(i3,j3) - matrix.coeff(i2,j3) * matrix.coeff(i3,j2));
+}
+
+template<typename MatrixType, int i, int j>
+EIGEN_DEVICE_FUNC 
+inline typename MatrixType::Scalar cofactor_4x4(const MatrixType& matrix)
+{
+  enum {
+    i1 = (i+1) % 4,
+    i2 = (i+2) % 4,
+    i3 = (i+3) % 4,
+    j1 = (j+1) % 4,
+    j2 = (j+2) % 4,
+    j3 = (j+3) % 4
+  };
+  return general_det3_helper(matrix, i1, i2, i3, j1, j2, j3)
+       + general_det3_helper(matrix, i2, i3, i1, j1, j2, j3)
+       + general_det3_helper(matrix, i3, i1, i2, j1, j2, j3);
+}
+
+template<int Arch, typename Scalar, typename MatrixType, typename ResultType>
+struct compute_inverse_size4
+{
+  EIGEN_DEVICE_FUNC
+  static void run(const MatrixType& matrix, ResultType& result)
+  {
+    result.coeffRef(0,0) =  cofactor_4x4<MatrixType,0,0>(matrix);
+    result.coeffRef(1,0) = -cofactor_4x4<MatrixType,0,1>(matrix);
+    result.coeffRef(2,0) =  cofactor_4x4<MatrixType,0,2>(matrix);
+    result.coeffRef(3,0) = -cofactor_4x4<MatrixType,0,3>(matrix);
+    result.coeffRef(0,2) =  cofactor_4x4<MatrixType,2,0>(matrix);
+    result.coeffRef(1,2) = -cofactor_4x4<MatrixType,2,1>(matrix);
+    result.coeffRef(2,2) =  cofactor_4x4<MatrixType,2,2>(matrix);
+    result.coeffRef(3,2) = -cofactor_4x4<MatrixType,2,3>(matrix);
+    result.coeffRef(0,1) = -cofactor_4x4<MatrixType,1,0>(matrix);
+    result.coeffRef(1,1) =  cofactor_4x4<MatrixType,1,1>(matrix);
+    result.coeffRef(2,1) = -cofactor_4x4<MatrixType,1,2>(matrix);
+    result.coeffRef(3,1) =  cofactor_4x4<MatrixType,1,3>(matrix);
+    result.coeffRef(0,3) = -cofactor_4x4<MatrixType,3,0>(matrix);
+    result.coeffRef(1,3) =  cofactor_4x4<MatrixType,3,1>(matrix);
+    result.coeffRef(2,3) = -cofactor_4x4<MatrixType,3,2>(matrix);
+    result.coeffRef(3,3) =  cofactor_4x4<MatrixType,3,3>(matrix);
+    result /= (matrix.col(0).cwiseProduct(result.row(0).transpose())).sum();
+  }
+};
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse<MatrixType, ResultType, 4>
+ : compute_inverse_size4<Architecture::Target, typename MatrixType::Scalar,
+                            MatrixType, ResultType>
+{
+};
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse_and_det_with_check<MatrixType, ResultType, 4>
+{
+  EIGEN_DEVICE_FUNC
+  static inline void run(
+    const MatrixType& matrix,
+    const typename MatrixType::RealScalar& absDeterminantThreshold,
+    ResultType& inverse,
+    typename ResultType::Scalar& determinant,
+    bool& invertible
+  )
+  {
+    using std::abs;
+    determinant = matrix.determinant();
+    invertible = abs(determinant) > absDeterminantThreshold;
+    if(invertible) compute_inverse<MatrixType, ResultType>::run(matrix, inverse);
+  }
+};
+
+/*************************
+*** MatrixBase methods ***
+*************************/
+
+} // end namespace internal
+
+namespace internal {
+
+// Specialization for "dense = dense_xpr.inverse()"
+template<typename DstXprType, typename XprType>
+struct Assignment<DstXprType, Inverse<XprType>, internal::assign_op<typename DstXprType::Scalar,typename XprType::Scalar>, Dense2Dense>
+{
+  typedef Inverse<XprType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename XprType::Scalar> &)
+  {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+    
+    const int Size = EIGEN_PLAIN_ENUM_MIN(XprType::ColsAtCompileTime,DstXprType::ColsAtCompileTime);
+    EIGEN_ONLY_USED_FOR_DEBUG(Size);
+    eigen_assert(( (Size<=1) || (Size>4) || (extract_data(src.nestedExpression())!=extract_data(dst)))
+              && "Aliasing problem detected in inverse(), you need to do inverse().eval() here.");
+
+    typedef typename internal::nested_eval<XprType,XprType::ColsAtCompileTime>::type  ActualXprType;
+    typedef typename internal::remove_all<ActualXprType>::type                        ActualXprTypeCleanded;
+    
+    ActualXprType actual_xpr(src.nestedExpression());
+    
+    compute_inverse<ActualXprTypeCleanded, DstXprType>::run(actual_xpr, dst);
+  }
+};
+
+  
+} // end namespace internal
+
+/** \lu_module
+  *
+  * \returns the matrix inverse of this matrix.
+  *
+  * For small fixed sizes up to 4x4, this method uses cofactors.
+  * In the general case, this method uses class PartialPivLU.
+  *
+  * \note This matrix must be invertible, otherwise the result is undefined. If you need an
+  * invertibility check, do the following:
+  * \li for fixed sizes up to 4x4, use computeInverseAndDetWithCheck().
+  * \li for the general case, use class FullPivLU.
+  *
+  * Example: \include MatrixBase_inverse.cpp
+  * Output: \verbinclude MatrixBase_inverse.out
+  *
+  * \sa computeInverseAndDetWithCheck()
+  */
+template<typename Derived>
+inline const Inverse<Derived> MatrixBase<Derived>::inverse() const
+{
+  EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsInteger,THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES)
+  eigen_assert(rows() == cols());
+  return Inverse<Derived>(derived());
+}
+
+/** \lu_module
+  *
+  * Computation of matrix inverse and determinant, with invertibility check.
+  *
+  * This is only for fixed-size square matrices of size up to 4x4.
+  *
+  * \param inverse Reference to the matrix in which to store the inverse.
+  * \param determinant Reference to the variable in which to store the determinant.
+  * \param invertible Reference to the bool variable in which to store whether the matrix is invertible.
+  * \param absDeterminantThreshold Optional parameter controlling the invertibility check.
+  *                                The matrix will be declared invertible if the absolute value of its
+  *                                determinant is greater than this threshold.
+  *
+  * Example: \include MatrixBase_computeInverseAndDetWithCheck.cpp
+  * Output: \verbinclude MatrixBase_computeInverseAndDetWithCheck.out
+  *
+  * \sa inverse(), computeInverseWithCheck()
+  */
+template<typename Derived>
+template<typename ResultType>
+inline void MatrixBase<Derived>::computeInverseAndDetWithCheck(
+    ResultType& inverse,
+    typename ResultType::Scalar& determinant,
+    bool& invertible,
+    const RealScalar& absDeterminantThreshold
+  ) const
+{
+  // i'd love to put some static assertions there, but SFINAE means that they have no effect...
+  eigen_assert(rows() == cols());
+  // for 2x2, it's worth giving a chance to avoid evaluating.
+  // for larger sizes, evaluating has negligible cost and limits code size.
+  typedef typename internal::conditional<
+    RowsAtCompileTime == 2,
+    typename internal::remove_all<typename internal::nested_eval<Derived, 2>::type>::type,
+    PlainObject
+  >::type MatrixType;
+  internal::compute_inverse_and_det_with_check<MatrixType, ResultType>::run
+    (derived(), absDeterminantThreshold, inverse, determinant, invertible);
+}
+
+/** \lu_module
+  *
+  * Computation of matrix inverse, with invertibility check.
+  *
+  * This is only for fixed-size square matrices of size up to 4x4.
+  *
+  * \param inverse Reference to the matrix in which to store the inverse.
+  * \param invertible Reference to the bool variable in which to store whether the matrix is invertible.
+  * \param absDeterminantThreshold Optional parameter controlling the invertibility check.
+  *                                The matrix will be declared invertible if the absolute value of its
+  *                                determinant is greater than this threshold.
+  *
+  * Example: \include MatrixBase_computeInverseWithCheck.cpp
+  * Output: \verbinclude MatrixBase_computeInverseWithCheck.out
+  *
+  * \sa inverse(), computeInverseAndDetWithCheck()
+  */
+template<typename Derived>
+template<typename ResultType>
+inline void MatrixBase<Derived>::computeInverseWithCheck(
+    ResultType& inverse,
+    bool& invertible,
+    const RealScalar& absDeterminantThreshold
+  ) const
+{
+  RealScalar determinant;
+  // i'd love to put some static assertions there, but SFINAE means that they have no effect...
+  eigen_assert(rows() == cols());
+  computeInverseAndDetWithCheck(inverse,determinant,invertible,absDeterminantThreshold);
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_INVERSE_IMPL_H
--- a/examples/ThirdPartyLibs/Eigen/src/LU/PartialPivLU.h
+++ b/examples/ThirdPartyLibs/Eigen/src/LU/PartialPivLU.h
@@ -0,0 +1,611 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PARTIALLU_H
+#define EIGEN_PARTIALLU_H
+
+namespace Eigen {
+
+namespace internal {
+template<typename _MatrixType> struct traits<PartialPivLU<_MatrixType> >
+ : traits<_MatrixType>
+{
+  typedef MatrixXpr XprKind;
+  typedef SolverStorage StorageKind;
+  typedef traits<_MatrixType> BaseTraits;
+  enum {
+    Flags = BaseTraits::Flags & RowMajorBit,
+    CoeffReadCost = Dynamic
+  };
+};
+
+template<typename T,typename Derived>
+struct enable_if_ref;
+// {
+//   typedef Derived type;
+// };
+
+template<typename T,typename Derived>
+struct enable_if_ref<Ref<T>,Derived> {
+  typedef Derived type;
+};
+
+} // end namespace internal
+
+/** \ingroup LU_Module
+  *
+  * \class PartialPivLU
+  *
+  * \brief LU decomposition of a matrix with partial pivoting, and related features
+  *
+  * \tparam _MatrixType the type of the matrix of which we are computing the LU decomposition
+  *
+  * This class represents a LU decomposition of a \b square \b invertible matrix, with partial pivoting: the matrix A
+  * is decomposed as A = PLU where L is unit-lower-triangular, U is upper-triangular, and P
+  * is a permutation matrix.
+  *
+  * Typically, partial pivoting LU decomposition is only considered numerically stable for square invertible
+  * matrices. Thus LAPACK's dgesv and dgesvx require the matrix to be square and invertible. The present class
+  * does the same. It will assert that the matrix is square, but it won't (actually it can't) check that the
+  * matrix is invertible: it is your task to check that you only use this decomposition on invertible matrices.
+  *
+  * The guaranteed safe alternative, working for all matrices, is the full pivoting LU decomposition, provided
+  * by class FullPivLU.
+  *
+  * This is \b not a rank-revealing LU decomposition. Many features are intentionally absent from this class,
+  * such as rank computation. If you need these features, use class FullPivLU.
+  *
+  * This LU decomposition is suitable to invert invertible matrices. It is what MatrixBase::inverse() uses
+  * in the general case.
+  * On the other hand, it is \b not suitable to determine whether a given matrix is invertible.
+  *
+  * The data of the LU decomposition can be directly accessed through the methods matrixLU(), permutationP().
+  *
+  * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
+  * 
+  * \sa MatrixBase::partialPivLu(), MatrixBase::determinant(), MatrixBase::inverse(), MatrixBase::computeInverse(), class FullPivLU
+  */
+template<typename _MatrixType> class PartialPivLU
+  : public SolverBase<PartialPivLU<_MatrixType> >
+{
+  public:
+
+    typedef _MatrixType MatrixType;
+    typedef SolverBase<PartialPivLU> Base;
+    EIGEN_GENERIC_PUBLIC_INTERFACE(PartialPivLU)
+    // FIXME StorageIndex defined in EIGEN_GENERIC_PUBLIC_INTERFACE should be int
+    enum {
+      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+    };
+    typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationType;
+    typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType;
+    typedef typename MatrixType::PlainObject PlainObject;
+
+    /**
+      * \brief Default Constructor.
+      *
+      * The default constructor is useful in cases in which the user intends to
+      * perform decompositions via PartialPivLU::compute(const MatrixType&).
+      */
+    PartialPivLU();
+
+    /** \brief Default Constructor with memory preallocation
+      *
+      * Like the default constructor but with preallocation of the internal data
+      * according to the specified problem \a size.
+      * \sa PartialPivLU()
+      */
+    explicit PartialPivLU(Index size);
+
+    /** Constructor.
+      *
+      * \param matrix the matrix of which to compute the LU decomposition.
+      *
+      * \warning The matrix should have full rank (e.g. if it's square, it should be invertible).
+      * If you need to deal with non-full rank, use class FullPivLU instead.
+      */
+    template<typename InputType>
+    explicit PartialPivLU(const EigenBase<InputType>& matrix);
+
+    /** Constructor for \link InplaceDecomposition inplace decomposition \endlink
+      *
+      * \param matrix the matrix of which to compute the LU decomposition.
+      *
+      * \warning The matrix should have full rank (e.g. if it's square, it should be invertible).
+      * If you need to deal with non-full rank, use class FullPivLU instead.
+      */
+    template<typename InputType>
+    explicit PartialPivLU(EigenBase<InputType>& matrix);
+
+    template<typename InputType>
+    PartialPivLU& compute(const EigenBase<InputType>& matrix) {
+      m_lu = matrix.derived();
+      compute();
+      return *this;
+    }
+
+    /** \returns the LU decomposition matrix: the upper-triangular part is U, the
+      * unit-lower-triangular part is L (at least for square matrices; in the non-square
+      * case, special care is needed, see the documentation of class FullPivLU).
+      *
+      * \sa matrixL(), matrixU()
+      */
+    inline const MatrixType& matrixLU() const
+    {
+      eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
+      return m_lu;
+    }
+
+    /** \returns the permutation matrix P.
+      */
+    inline const PermutationType& permutationP() const
+    {
+      eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
+      return m_p;
+    }
+
+    /** This method returns the solution x to the equation Ax=b, where A is the matrix of which
+      * *this is the LU decomposition.
+      *
+      * \param b the right-hand-side of the equation to solve. Can be a vector or a matrix,
+      *          the only requirement in order for the equation to make sense is that
+      *          b.rows()==A.rows(), where A is the matrix of which *this is the LU decomposition.
+      *
+      * \returns the solution.
+      *
+      * Example: \include PartialPivLU_solve.cpp
+      * Output: \verbinclude PartialPivLU_solve.out
+      *
+      * Since this PartialPivLU class assumes anyway that the matrix A is invertible, the solution
+      * theoretically exists and is unique regardless of b.
+      *
+      * \sa TriangularView::solve(), inverse(), computeInverse()
+      */
+    // FIXME this is a copy-paste of the base-class member to add the isInitialized assertion.
+    template<typename Rhs>
+    inline const Solve<PartialPivLU, Rhs>
+    solve(const MatrixBase<Rhs>& b) const
+    {
+      eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
+      return Solve<PartialPivLU, Rhs>(*this, b.derived());
+    }
+
+    /** \returns an estimate of the reciprocal condition number of the matrix of which \c *this is
+        the LU decomposition.
+      */
+    inline RealScalar rcond() const
+    {
+      eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
+      return internal::rcond_estimate_helper(m_l1_norm, *this);
+    }
+
+    /** \returns the inverse of the matrix of which *this is the LU decomposition.
+      *
+      * \warning The matrix being decomposed here is assumed to be invertible. If you need to check for
+      *          invertibility, use class FullPivLU instead.
+      *
+      * \sa MatrixBase::inverse(), LU::inverse()
+      */
+    inline const Inverse<PartialPivLU> inverse() const
+    {
+      eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
+      return Inverse<PartialPivLU>(*this);
+    }
+
+    /** \returns the determinant of the matrix of which
+      * *this is the LU decomposition. It has only linear complexity
+      * (that is, O(n) where n is the dimension of the square matrix)
+      * as the LU decomposition has already been computed.
+      *
+      * \note For fixed-size matrices of size up to 4, MatrixBase::determinant() offers
+      *       optimized paths.
+      *
+      * \warning a determinant can be very big or small, so for matrices
+      * of large enough dimension, there is a risk of overflow/underflow.
+      *
+      * \sa MatrixBase::determinant()
+      */
+    Scalar determinant() const;
+
+    MatrixType reconstructedMatrix() const;
+
+    inline Index rows() const { return m_lu.rows(); }
+    inline Index cols() const { return m_lu.cols(); }
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
+    void _solve_impl(const RhsType &rhs, DstType &dst) const {
+     /* The decomposition PA = LU can be rewritten as A = P^{-1} L U.
+      * So we proceed as follows:
+      * Step 1: compute c = Pb.
+      * Step 2: replace c by the solution x to Lx = c.
+      * Step 3: replace c by the solution x to Ux = c.
+      */
+
+      eigen_assert(rhs.rows() == m_lu.rows());
+
+      // Step 1
+      dst = permutationP() * rhs;
+
+      // Step 2
+      m_lu.template triangularView<UnitLower>().solveInPlace(dst);
+
+      // Step 3
+      m_lu.template triangularView<Upper>().solveInPlace(dst);
+    }
+
+    template<bool Conjugate, typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
+    void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const {
+     /* The decomposition PA = LU can be rewritten as A = P^{-1} L U.
+      * So we proceed as follows:
+      * Step 1: compute c = Pb.
+      * Step 2: replace c by the solution x to Lx = c.
+      * Step 3: replace c by the solution x to Ux = c.
+      */
+
+      eigen_assert(rhs.rows() == m_lu.cols());
+
+      if (Conjugate) {
+        // Step 1
+        dst = m_lu.template triangularView<Upper>().adjoint().solve(rhs);
+        // Step 2
+        m_lu.template triangularView<UnitLower>().adjoint().solveInPlace(dst);
+      } else {
+        // Step 1
+        dst = m_lu.template triangularView<Upper>().transpose().solve(rhs);
+        // Step 2
+        m_lu.template triangularView<UnitLower>().transpose().solveInPlace(dst);
+      }
+      // Step 3
+      dst = permutationP().transpose() * dst;
+    }
+    #endif
+
+  protected:
+
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+    }
+
+    void compute();
+
+    MatrixType m_lu;
+    PermutationType m_p;
+    TranspositionType m_rowsTranspositions;
+    RealScalar m_l1_norm;
+    signed char m_det_p;
+    bool m_isInitialized;
+};
+
+template<typename MatrixType>
+PartialPivLU<MatrixType>::PartialPivLU()
+  : m_lu(),
+    m_p(),
+    m_rowsTranspositions(),
+    m_l1_norm(0),
+    m_det_p(0),
+    m_isInitialized(false)
+{
+}
+
+template<typename MatrixType>
+PartialPivLU<MatrixType>::PartialPivLU(Index size)
+  : m_lu(size, size),
+    m_p(size),
+    m_rowsTranspositions(size),
+    m_l1_norm(0),
+    m_det_p(0),
+    m_isInitialized(false)
+{
+}
+
+template<typename MatrixType>
+template<typename InputType>
+PartialPivLU<MatrixType>::PartialPivLU(const EigenBase<InputType>& matrix)
+  : m_lu(matrix.rows(),matrix.cols()),
+    m_p(matrix.rows()),
+    m_rowsTranspositions(matrix.rows()),
+    m_l1_norm(0),
+    m_det_p(0),
+    m_isInitialized(false)
+{
+  compute(matrix.derived());
+}
+
+template<typename MatrixType>
+template<typename InputType>
+PartialPivLU<MatrixType>::PartialPivLU(EigenBase<InputType>& matrix)
+  : m_lu(matrix.derived()),
+    m_p(matrix.rows()),
+    m_rowsTranspositions(matrix.rows()),
+    m_l1_norm(0),
+    m_det_p(0),
+    m_isInitialized(false)
+{
+  compute();
+}
+
+namespace internal {
+
+/** \internal This is the blocked version of fullpivlu_unblocked() */
+template<typename Scalar, int StorageOrder, typename PivIndex>
+struct partial_lu_impl
+{
+  // FIXME add a stride to Map, so that the following mapping becomes easier,
+  // another option would be to create an expression being able to automatically
+  // warp any Map, Matrix, and Block expressions as a unique type, but since that's exactly
+  // a Map + stride, why not adding a stride to Map, and convenient ctors from a Matrix,
+  // and Block.
+  typedef Map<Matrix<Scalar, Dynamic, Dynamic, StorageOrder> > MapLU;
+  typedef Block<MapLU, Dynamic, Dynamic> MatrixType;
+  typedef Block<MatrixType,Dynamic,Dynamic> BlockType;
+  typedef typename MatrixType::RealScalar RealScalar;
+
+  /** \internal performs the LU decomposition in-place of the matrix \a lu
+    * using an unblocked algorithm.
+    *
+    * In addition, this function returns the row transpositions in the
+    * vector \a row_transpositions which must have a size equal to the number
+    * of columns of the matrix \a lu, and an integer \a nb_transpositions
+    * which returns the actual number of transpositions.
+    *
+    * \returns The index of the first pivot which is exactly zero if any, or a negative number otherwise.
+    */
+  static Index unblocked_lu(MatrixType& lu, PivIndex* row_transpositions, PivIndex& nb_transpositions)
+  {
+    typedef scalar_score_coeff_op<Scalar> Scoring;
+    typedef typename Scoring::result_type Score;
+    const Index rows = lu.rows();
+    const Index cols = lu.cols();
+    const Index size = (std::min)(rows,cols);
+    nb_transpositions = 0;
+    Index first_zero_pivot = -1;
+    for(Index k = 0; k < size; ++k)
+    {
+      Index rrows = rows-k-1;
+      Index rcols = cols-k-1;
+
+      Index row_of_biggest_in_col;
+      Score biggest_in_corner
+        = lu.col(k).tail(rows-k).unaryExpr(Scoring()).maxCoeff(&row_of_biggest_in_col);
+      row_of_biggest_in_col += k;
+
+      row_transpositions[k] = PivIndex(row_of_biggest_in_col);
+
+      if(biggest_in_corner != Score(0))
+      {
+        if(k != row_of_biggest_in_col)
+        {
+          lu.row(k).swap(lu.row(row_of_biggest_in_col));
+          ++nb_transpositions;
+        }
+
+        // FIXME shall we introduce a safe quotient expression in cas 1/lu.coeff(k,k)
+        // overflow but not the actual quotient?
+        lu.col(k).tail(rrows) /= lu.coeff(k,k);
+      }
+      else if(first_zero_pivot==-1)
+      {
+        // the pivot is exactly zero, we record the index of the first pivot which is exactly 0,
+        // and continue the factorization such we still have A = PLU
+        first_zero_pivot = k;
+      }
+
+      if(k<rows-1)
+        lu.bottomRightCorner(rrows,rcols).noalias() -= lu.col(k).tail(rrows) * lu.row(k).tail(rcols);
+    }
+    return first_zero_pivot;
+  }
+
+  /** \internal performs the LU decomposition in-place of the matrix represented
+    * by the variables \a rows, \a cols, \a lu_data, and \a lu_stride using a
+    * recursive, blocked algorithm.
+    *
+    * In addition, this function returns the row transpositions in the
+    * vector \a row_transpositions which must have a size equal to the number
+    * of columns of the matrix \a lu, and an integer \a nb_transpositions
+    * which returns the actual number of transpositions.
+    *
+    * \returns The index of the first pivot which is exactly zero if any, or a negative number otherwise.
+    *
+    * \note This very low level interface using pointers, etc. is to:
+    *   1 - reduce the number of instanciations to the strict minimum
+    *   2 - avoid infinite recursion of the instanciations with Block<Block<Block<...> > >
+    */
+  static Index blocked_lu(Index rows, Index cols, Scalar* lu_data, Index luStride, PivIndex* row_transpositions, PivIndex& nb_transpositions, Index maxBlockSize=256)
+  {
+    MapLU lu1(lu_data,StorageOrder==RowMajor?rows:luStride,StorageOrder==RowMajor?luStride:cols);
+    MatrixType lu(lu1,0,0,rows,cols);
+
+    const Index size = (std::min)(rows,cols);
+
+    // if the matrix is too small, no blocking:
+    if(size<=16)
+    {
+      return unblocked_lu(lu, row_transpositions, nb_transpositions);
+    }
+
+    // automatically adjust the number of subdivisions to the size
+    // of the matrix so that there is enough sub blocks:
+    Index blockSize;
+    {
+      blockSize = size/8;
+      blockSize = (blockSize/16)*16;
+      blockSize = (std::min)((std::max)(blockSize,Index(8)), maxBlockSize);
+    }
+
+    nb_transpositions = 0;
+    Index first_zero_pivot = -1;
+    for(Index k = 0; k < size; k+=blockSize)
+    {
+      Index bs = (std::min)(size-k,blockSize); // actual size of the block
+      Index trows = rows - k - bs; // trailing rows
+      Index tsize = size - k - bs; // trailing size
+
+      // partition the matrix:
+      //                          A00 | A01 | A02
+      // lu  = A_0 | A_1 | A_2 =  A10 | A11 | A12
+      //                          A20 | A21 | A22
+      BlockType A_0(lu,0,0,rows,k);
+      BlockType A_2(lu,0,k+bs,rows,tsize);
+      BlockType A11(lu,k,k,bs,bs);
+      BlockType A12(lu,k,k+bs,bs,tsize);
+      BlockType A21(lu,k+bs,k,trows,bs);
+      BlockType A22(lu,k+bs,k+bs,trows,tsize);
+
+      PivIndex nb_transpositions_in_panel;
+      // recursively call the blocked LU algorithm on [A11^T A21^T]^T
+      // with a very small blocking size:
+      Index ret = blocked_lu(trows+bs, bs, &lu.coeffRef(k,k), luStride,
+                   row_transpositions+k, nb_transpositions_in_panel, 16);
+      if(ret>=0 && first_zero_pivot==-1)
+        first_zero_pivot = k+ret;
+
+      nb_transpositions += nb_transpositions_in_panel;
+      // update permutations and apply them to A_0
+      for(Index i=k; i<k+bs; ++i)
+      {
+        Index piv = (row_transpositions[i] += internal::convert_index<PivIndex>(k));
+        A_0.row(i).swap(A_0.row(piv));
+      }
+
+      if(trows)
+      {
+        // apply permutations to A_2
+        for(Index i=k;i<k+bs; ++i)
+          A_2.row(i).swap(A_2.row(row_transpositions[i]));
+
+        // A12 = A11^-1 A12
+        A11.template triangularView<UnitLower>().solveInPlace(A12);
+
+        A22.noalias() -= A21 * A12;
+      }
+    }
+    return first_zero_pivot;
+  }
+};
+
+/** \internal performs the LU decomposition with partial pivoting in-place.
+  */
+template<typename MatrixType, typename TranspositionType>
+void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, typename TranspositionType::StorageIndex& nb_transpositions)
+{
+  eigen_assert(lu.cols() == row_transpositions.size());
+  eigen_assert((&row_transpositions.coeffRef(1)-&row_transpositions.coeffRef(0)) == 1);
+
+  partial_lu_impl
+    <typename MatrixType::Scalar, MatrixType::Flags&RowMajorBit?RowMajor:ColMajor, typename TranspositionType::StorageIndex>
+    ::blocked_lu(lu.rows(), lu.cols(), &lu.coeffRef(0,0), lu.outerStride(), &row_transpositions.coeffRef(0), nb_transpositions);
+}
+
+} // end namespace internal
+
+template<typename MatrixType>
+void PartialPivLU<MatrixType>::compute()
+{
+  check_template_parameters();
+
+  // the row permutation is stored as int indices, so just to be sure:
+  eigen_assert(m_lu.rows()<NumTraits<int>::highest());
+
+  m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff();
+
+  eigen_assert(m_lu.rows() == m_lu.cols() && "PartialPivLU is only for square (and moreover invertible) matrices");
+  const Index size = m_lu.rows();
+
+  m_rowsTranspositions.resize(size);
+
+  typename TranspositionType::StorageIndex nb_transpositions;
+  internal::partial_lu_inplace(m_lu, m_rowsTranspositions, nb_transpositions);
+  m_det_p = (nb_transpositions%2) ? -1 : 1;
+
+  m_p = m_rowsTranspositions;
+
+  m_isInitialized = true;
+}
+
+template<typename MatrixType>
+typename PartialPivLU<MatrixType>::Scalar PartialPivLU<MatrixType>::determinant() const
+{
+  eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
+  return Scalar(m_det_p) * m_lu.diagonal().prod();
+}
+
+/** \returns the matrix represented by the decomposition,
+ * i.e., it returns the product: P^{-1} L U.
+ * This function is provided for debug purpose. */
+template<typename MatrixType>
+MatrixType PartialPivLU<MatrixType>::reconstructedMatrix() const
+{
+  eigen_assert(m_isInitialized && "LU is not initialized.");
+  // LU
+  MatrixType res = m_lu.template triangularView<UnitLower>().toDenseMatrix()
+                 * m_lu.template triangularView<Upper>();
+
+  // P^{-1}(LU)
+  res = m_p.inverse() * res;
+
+  return res;
+}
+
+/***** Implementation details *****************************************************/
+
+namespace internal {
+
+/***** Implementation of inverse() *****************************************************/
+template<typename DstXprType, typename MatrixType>
+struct Assignment<DstXprType, Inverse<PartialPivLU<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename PartialPivLU<MatrixType>::Scalar>, Dense2Dense>
+{
+  typedef PartialPivLU<MatrixType> LuType;
+  typedef Inverse<LuType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename LuType::Scalar> &)
+  {
+    dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
+  }
+};
+} // end namespace internal
+
+/******** MatrixBase methods *******/
+
+/** \lu_module
+  *
+  * \return the partial-pivoting LU decomposition of \c *this.
+  *
+  * \sa class PartialPivLU
+  */
+template<typename Derived>
+inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
+MatrixBase<Derived>::partialPivLu() const
+{
+  return PartialPivLU<PlainObject>(eval());
+}
+
+/** \lu_module
+  *
+  * Synonym of partialPivLu().
+  *
+  * \return the partial-pivoting LU decomposition of \c *this.
+  *
+  * \sa class PartialPivLU
+  */
+template<typename Derived>
+inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
+MatrixBase<Derived>::lu() const
+{
+  return PartialPivLU<PlainObject>(eval());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_PARTIALLU_H
--- a/examples/ThirdPartyLibs/Eigen/src/LU/PartialPivLU_LAPACKE.h
+++ b/examples/ThirdPartyLibs/Eigen/src/LU/PartialPivLU_LAPACKE.h
@@ -0,0 +1,83 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to LAPACKe
+ *     LU decomposition with partial pivoting based on LAPACKE_?getrf function.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_PARTIALLU_LAPACK_H
+#define EIGEN_PARTIALLU_LAPACK_H
+
+namespace Eigen { 
+
+namespace internal {
+
+/** \internal Specialization for the data types supported by LAPACKe */
+
+#define EIGEN_LAPACKE_LU_PARTPIV(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX) \
+template<int StorageOrder> \
+struct partial_lu_impl<EIGTYPE, StorageOrder, lapack_int> \
+{ \
+  /* \internal performs the LU decomposition in-place of the matrix represented */ \
+  static lapack_int blocked_lu(Index rows, Index cols, EIGTYPE* lu_data, Index luStride, lapack_int* row_transpositions, lapack_int& nb_transpositions, lapack_int maxBlockSize=256) \
+  { \
+    EIGEN_UNUSED_VARIABLE(maxBlockSize);\
+    lapack_int matrix_order, first_zero_pivot; \
+    lapack_int m, n, lda, *ipiv, info; \
+    EIGTYPE* a; \
+/* Set up parameters for ?getrf */ \
+    matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
+    lda = convert_index<lapack_int>(luStride); \
+    a = lu_data; \
+    ipiv = row_transpositions; \
+    m = convert_index<lapack_int>(rows); \
+    n = convert_index<lapack_int>(cols); \
+    nb_transpositions = 0; \
+\
+    info = LAPACKE_##LAPACKE_PREFIX##getrf( matrix_order, m, n, (LAPACKE_TYPE*)a, lda, ipiv ); \
+\
+    for(int i=0;i<m;i++) { ipiv[i]--; if (ipiv[i]!=i) nb_transpositions++; } \
+\
+    eigen_assert(info >= 0); \
+/* something should be done with nb_transpositions */ \
+\
+    first_zero_pivot = info; \
+    return first_zero_pivot; \
+  } \
+};
+
+EIGEN_LAPACKE_LU_PARTPIV(double, double, d)
+EIGEN_LAPACKE_LU_PARTPIV(float, float, s)
+EIGEN_LAPACKE_LU_PARTPIV(dcomplex, lapack_complex_double, z)
+EIGEN_LAPACKE_LU_PARTPIV(scomplex, lapack_complex_float,  c)
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_PARTIALLU_LAPACK_H
--- a/examples/ThirdPartyLibs/Eigen/src/LU/arch/Inverse_SSE.h
+++ b/examples/ThirdPartyLibs/Eigen/src/LU/arch/Inverse_SSE.h
@@ -0,0 +1,338 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2001 Intel Corporation
+// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+// The SSE code for the 4x4 float and double matrix inverse in this file
+// comes from the following Intel's library:
+// http://software.intel.com/en-us/articles/optimized-matrix-library-for-use-with-the-intel-pentiumr-4-processors-sse2-instructions/
+//
+// Here is the respective copyright and license statement:
+//
+//   Copyright (c) 2001 Intel Corporation.
+//
+// Permition is granted to use, copy, distribute and prepare derivative works
+// of this library for any purpose and without fee, provided, that the above
+// copyright notice and this statement appear in all copies.
+// Intel makes no representations about the suitability of this software for
+// any purpose, and specifically disclaims all warranties.
+// See LEGAL.TXT for all the legal information.
+
+#ifndef EIGEN_INVERSE_SSE_H
+#define EIGEN_INVERSE_SSE_H
+
+namespace Eigen { 
+
+namespace internal {
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse_size4<Architecture::SSE, float, MatrixType, ResultType>
+{
+  enum {
+    MatrixAlignment     = traits<MatrixType>::Alignment,
+    ResultAlignment     = traits<ResultType>::Alignment,
+    StorageOrdersMatch  = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
+  };
+  typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType;
+  
+  static void run(const MatrixType& mat, ResultType& result)
+  {
+    ActualMatrixType matrix(mat);
+    EIGEN_ALIGN16 const unsigned int _Sign_PNNP[4] = { 0x00000000, 0x80000000, 0x80000000, 0x00000000 };
+
+    // Load the full matrix into registers
+    __m128 _L1 = matrix.template packet<MatrixAlignment>( 0);
+    __m128 _L2 = matrix.template packet<MatrixAlignment>( 4);
+    __m128 _L3 = matrix.template packet<MatrixAlignment>( 8);
+    __m128 _L4 = matrix.template packet<MatrixAlignment>(12);
+
+    // The inverse is calculated using "Divide and Conquer" technique. The
+    // original matrix is divide into four 2x2 sub-matrices. Since each
+    // register holds four matrix element, the smaller matrices are
+    // represented as a registers. Hence we get a better locality of the
+    // calculations.
+
+    __m128 A, B, C, D; // the four sub-matrices
+    if(!StorageOrdersMatch)
+    {
+      A = _mm_unpacklo_ps(_L1, _L2);
+      B = _mm_unpacklo_ps(_L3, _L4);
+      C = _mm_unpackhi_ps(_L1, _L2);
+      D = _mm_unpackhi_ps(_L3, _L4);
+    }
+    else
+    {
+      A = _mm_movelh_ps(_L1, _L2);
+      B = _mm_movehl_ps(_L2, _L1);
+      C = _mm_movelh_ps(_L3, _L4);
+      D = _mm_movehl_ps(_L4, _L3);
+    }
+
+    __m128 iA, iB, iC, iD,                 // partial inverse of the sub-matrices
+            DC, AB;
+    __m128 dA, dB, dC, dD;                 // determinant of the sub-matrices
+    __m128 det, d, d1, d2;
+    __m128 rd;                             // reciprocal of the determinant
+
+    //  AB = A# * B
+    AB = _mm_mul_ps(_mm_shuffle_ps(A,A,0x0F), B);
+    AB = _mm_sub_ps(AB,_mm_mul_ps(_mm_shuffle_ps(A,A,0xA5), _mm_shuffle_ps(B,B,0x4E)));
+    //  DC = D# * C
+    DC = _mm_mul_ps(_mm_shuffle_ps(D,D,0x0F), C);
+    DC = _mm_sub_ps(DC,_mm_mul_ps(_mm_shuffle_ps(D,D,0xA5), _mm_shuffle_ps(C,C,0x4E)));
+
+    //  dA = |A|
+    dA = _mm_mul_ps(_mm_shuffle_ps(A, A, 0x5F),A);
+    dA = _mm_sub_ss(dA, _mm_movehl_ps(dA,dA));
+    //  dB = |B|
+    dB = _mm_mul_ps(_mm_shuffle_ps(B, B, 0x5F),B);
+    dB = _mm_sub_ss(dB, _mm_movehl_ps(dB,dB));
+
+    //  dC = |C|
+    dC = _mm_mul_ps(_mm_shuffle_ps(C, C, 0x5F),C);
+    dC = _mm_sub_ss(dC, _mm_movehl_ps(dC,dC));
+    //  dD = |D|
+    dD = _mm_mul_ps(_mm_shuffle_ps(D, D, 0x5F),D);
+    dD = _mm_sub_ss(dD, _mm_movehl_ps(dD,dD));
+
+    //  d = trace(AB*DC) = trace(A#*B*D#*C)
+    d = _mm_mul_ps(_mm_shuffle_ps(DC,DC,0xD8),AB);
+
+    //  iD = C*A#*B
+    iD = _mm_mul_ps(_mm_shuffle_ps(C,C,0xA0), _mm_movelh_ps(AB,AB));
+    iD = _mm_add_ps(iD,_mm_mul_ps(_mm_shuffle_ps(C,C,0xF5), _mm_movehl_ps(AB,AB)));
+    //  iA = B*D#*C
+    iA = _mm_mul_ps(_mm_shuffle_ps(B,B,0xA0), _mm_movelh_ps(DC,DC));
+    iA = _mm_add_ps(iA,_mm_mul_ps(_mm_shuffle_ps(B,B,0xF5), _mm_movehl_ps(DC,DC)));
+
+    //  d = trace(AB*DC) = trace(A#*B*D#*C) [continue]
+    d  = _mm_add_ps(d, _mm_movehl_ps(d, d));
+    d  = _mm_add_ss(d, _mm_shuffle_ps(d, d, 1));
+    d1 = _mm_mul_ss(dA,dD);
+    d2 = _mm_mul_ss(dB,dC);
+
+    //  iD = D*|A| - C*A#*B
+    iD = _mm_sub_ps(_mm_mul_ps(D,_mm_shuffle_ps(dA,dA,0)), iD);
+
+    //  iA = A*|D| - B*D#*C;
+    iA = _mm_sub_ps(_mm_mul_ps(A,_mm_shuffle_ps(dD,dD,0)), iA);
+
+    //  det = |A|*|D| + |B|*|C| - trace(A#*B*D#*C)
+    det = _mm_sub_ss(_mm_add_ss(d1,d2),d);
+    rd  = _mm_div_ss(_mm_set_ss(1.0f), det);
+
+//     #ifdef ZERO_SINGULAR
+//         rd = _mm_and_ps(_mm_cmpneq_ss(det,_mm_setzero_ps()), rd);
+//     #endif
+
+    //  iB = D * (A#B)# = D*B#*A
+    iB = _mm_mul_ps(D, _mm_shuffle_ps(AB,AB,0x33));
+    iB = _mm_sub_ps(iB, _mm_mul_ps(_mm_shuffle_ps(D,D,0xB1), _mm_shuffle_ps(AB,AB,0x66)));
+    //  iC = A * (D#C)# = A*C#*D
+    iC = _mm_mul_ps(A, _mm_shuffle_ps(DC,DC,0x33));
+    iC = _mm_sub_ps(iC, _mm_mul_ps(_mm_shuffle_ps(A,A,0xB1), _mm_shuffle_ps(DC,DC,0x66)));
+
+    rd = _mm_shuffle_ps(rd,rd,0);
+    rd = _mm_xor_ps(rd, _mm_load_ps((float*)_Sign_PNNP));
+
+    //  iB = C*|B| - D*B#*A
+    iB = _mm_sub_ps(_mm_mul_ps(C,_mm_shuffle_ps(dB,dB,0)), iB);
+
+    //  iC = B*|C| - A*C#*D;
+    iC = _mm_sub_ps(_mm_mul_ps(B,_mm_shuffle_ps(dC,dC,0)), iC);
+
+    //  iX = iX / det
+    iA = _mm_mul_ps(rd,iA);
+    iB = _mm_mul_ps(rd,iB);
+    iC = _mm_mul_ps(rd,iC);
+    iD = _mm_mul_ps(rd,iD);
+
+    Index res_stride = result.outerStride();
+    float* res = result.data();
+    pstoret<float, Packet4f, ResultAlignment>(res+0,            _mm_shuffle_ps(iA,iB,0x77));
+    pstoret<float, Packet4f, ResultAlignment>(res+res_stride,   _mm_shuffle_ps(iA,iB,0x22));
+    pstoret<float, Packet4f, ResultAlignment>(res+2*res_stride, _mm_shuffle_ps(iC,iD,0x77));
+    pstoret<float, Packet4f, ResultAlignment>(res+3*res_stride, _mm_shuffle_ps(iC,iD,0x22));
+  }
+
+};
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse_size4<Architecture::SSE, double, MatrixType, ResultType>
+{
+  enum {
+    MatrixAlignment     = traits<MatrixType>::Alignment,
+    ResultAlignment     = traits<ResultType>::Alignment,
+    StorageOrdersMatch  = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
+  };
+  typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType;
+  
+  static void run(const MatrixType& mat, ResultType& result)
+  {
+    ActualMatrixType matrix(mat);
+    const __m128d _Sign_NP = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
+    const __m128d _Sign_PN = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+
+    // The inverse is calculated using "Divide and Conquer" technique. The
+    // original matrix is divide into four 2x2 sub-matrices. Since each
+    // register of the matrix holds two elements, the smaller matrices are
+    // consisted of two registers. Hence we get a better locality of the
+    // calculations.
+
+    // the four sub-matrices
+    __m128d A1, A2, B1, B2, C1, C2, D1, D2;
+    
+    if(StorageOrdersMatch)
+    {
+      A1 = matrix.template packet<MatrixAlignment>( 0); B1 = matrix.template packet<MatrixAlignment>( 2);
+      A2 = matrix.template packet<MatrixAlignment>( 4); B2 = matrix.template packet<MatrixAlignment>( 6);
+      C1 = matrix.template packet<MatrixAlignment>( 8); D1 = matrix.template packet<MatrixAlignment>(10);
+      C2 = matrix.template packet<MatrixAlignment>(12); D2 = matrix.template packet<MatrixAlignment>(14);
+    }
+    else
+    {
+      __m128d tmp;
+      A1 = matrix.template packet<MatrixAlignment>( 0); C1 = matrix.template packet<MatrixAlignment>( 2);
+      A2 = matrix.template packet<MatrixAlignment>( 4); C2 = matrix.template packet<MatrixAlignment>( 6);
+      tmp = A1;
+      A1 = _mm_unpacklo_pd(A1,A2);
+      A2 = _mm_unpackhi_pd(tmp,A2);
+      tmp = C1;
+      C1 = _mm_unpacklo_pd(C1,C2);
+      C2 = _mm_unpackhi_pd(tmp,C2);
+      
+      B1 = matrix.template packet<MatrixAlignment>( 8); D1 = matrix.template packet<MatrixAlignment>(10);
+      B2 = matrix.template packet<MatrixAlignment>(12); D2 = matrix.template packet<MatrixAlignment>(14);
+      tmp = B1;
+      B1 = _mm_unpacklo_pd(B1,B2);
+      B2 = _mm_unpackhi_pd(tmp,B2);
+      tmp = D1;
+      D1 = _mm_unpacklo_pd(D1,D2);
+      D2 = _mm_unpackhi_pd(tmp,D2);
+    }
+    
+    __m128d iA1, iA2, iB1, iB2, iC1, iC2, iD1, iD2,     // partial invese of the sub-matrices
+            DC1, DC2, AB1, AB2;
+    __m128d dA, dB, dC, dD;     // determinant of the sub-matrices
+    __m128d det, d1, d2, rd;
+
+    //  dA = |A|
+    dA = _mm_shuffle_pd(A2, A2, 1);
+    dA = _mm_mul_pd(A1, dA);
+    dA = _mm_sub_sd(dA, _mm_shuffle_pd(dA,dA,3));
+    //  dB = |B|
+    dB = _mm_shuffle_pd(B2, B2, 1);
+    dB = _mm_mul_pd(B1, dB);
+    dB = _mm_sub_sd(dB, _mm_shuffle_pd(dB,dB,3));
+
+    //  AB = A# * B
+    AB1 = _mm_mul_pd(B1, _mm_shuffle_pd(A2,A2,3));
+    AB2 = _mm_mul_pd(B2, _mm_shuffle_pd(A1,A1,0));
+    AB1 = _mm_sub_pd(AB1, _mm_mul_pd(B2, _mm_shuffle_pd(A1,A1,3)));
+    AB2 = _mm_sub_pd(AB2, _mm_mul_pd(B1, _mm_shuffle_pd(A2,A2,0)));
+
+    //  dC = |C|
+    dC = _mm_shuffle_pd(C2, C2, 1);
+    dC = _mm_mul_pd(C1, dC);
+    dC = _mm_sub_sd(dC, _mm_shuffle_pd(dC,dC,3));
+    //  dD = |D|
+    dD = _mm_shuffle_pd(D2, D2, 1);
+    dD = _mm_mul_pd(D1, dD);
+    dD = _mm_sub_sd(dD, _mm_shuffle_pd(dD,dD,3));
+
+    //  DC = D# * C
+    DC1 = _mm_mul_pd(C1, _mm_shuffle_pd(D2,D2,3));
+    DC2 = _mm_mul_pd(C2, _mm_shuffle_pd(D1,D1,0));
+    DC1 = _mm_sub_pd(DC1, _mm_mul_pd(C2, _mm_shuffle_pd(D1,D1,3)));
+    DC2 = _mm_sub_pd(DC2, _mm_mul_pd(C1, _mm_shuffle_pd(D2,D2,0)));
+
+    //  rd = trace(AB*DC) = trace(A#*B*D#*C)
+    d1 = _mm_mul_pd(AB1, _mm_shuffle_pd(DC1, DC2, 0));
+    d2 = _mm_mul_pd(AB2, _mm_shuffle_pd(DC1, DC2, 3));
+    rd = _mm_add_pd(d1, d2);
+    rd = _mm_add_sd(rd, _mm_shuffle_pd(rd, rd,3));
+
+    //  iD = C*A#*B
+    iD1 = _mm_mul_pd(AB1, _mm_shuffle_pd(C1,C1,0));
+    iD2 = _mm_mul_pd(AB1, _mm_shuffle_pd(C2,C2,0));
+    iD1 = _mm_add_pd(iD1, _mm_mul_pd(AB2, _mm_shuffle_pd(C1,C1,3)));
+    iD2 = _mm_add_pd(iD2, _mm_mul_pd(AB2, _mm_shuffle_pd(C2,C2,3)));
+
+    //  iA = B*D#*C
+    iA1 = _mm_mul_pd(DC1, _mm_shuffle_pd(B1,B1,0));
+    iA2 = _mm_mul_pd(DC1, _mm_shuffle_pd(B2,B2,0));
+    iA1 = _mm_add_pd(iA1, _mm_mul_pd(DC2, _mm_shuffle_pd(B1,B1,3)));
+    iA2 = _mm_add_pd(iA2, _mm_mul_pd(DC2, _mm_shuffle_pd(B2,B2,3)));
+
+    //  iD = D*|A| - C*A#*B
+    dA = _mm_shuffle_pd(dA,dA,0);
+    iD1 = _mm_sub_pd(_mm_mul_pd(D1, dA), iD1);
+    iD2 = _mm_sub_pd(_mm_mul_pd(D2, dA), iD2);
+
+    //  iA = A*|D| - B*D#*C;
+    dD = _mm_shuffle_pd(dD,dD,0);
+    iA1 = _mm_sub_pd(_mm_mul_pd(A1, dD), iA1);
+    iA2 = _mm_sub_pd(_mm_mul_pd(A2, dD), iA2);
+
+    d1 = _mm_mul_sd(dA, dD);
+    d2 = _mm_mul_sd(dB, dC);
+
+    //  iB = D * (A#B)# = D*B#*A
+    iB1 = _mm_mul_pd(D1, _mm_shuffle_pd(AB2,AB1,1));
+    iB2 = _mm_mul_pd(D2, _mm_shuffle_pd(AB2,AB1,1));
+    iB1 = _mm_sub_pd(iB1, _mm_mul_pd(_mm_shuffle_pd(D1,D1,1), _mm_shuffle_pd(AB2,AB1,2)));
+    iB2 = _mm_sub_pd(iB2, _mm_mul_pd(_mm_shuffle_pd(D2,D2,1), _mm_shuffle_pd(AB2,AB1,2)));
+
+    //  det = |A|*|D| + |B|*|C| - trace(A#*B*D#*C)
+    det = _mm_add_sd(d1, d2);
+    det = _mm_sub_sd(det, rd);
+
+    //  iC = A * (D#C)# = A*C#*D
+    iC1 = _mm_mul_pd(A1, _mm_shuffle_pd(DC2,DC1,1));
+    iC2 = _mm_mul_pd(A2, _mm_shuffle_pd(DC2,DC1,1));
+    iC1 = _mm_sub_pd(iC1, _mm_mul_pd(_mm_shuffle_pd(A1,A1,1), _mm_shuffle_pd(DC2,DC1,2)));
+    iC2 = _mm_sub_pd(iC2, _mm_mul_pd(_mm_shuffle_pd(A2,A2,1), _mm_shuffle_pd(DC2,DC1,2)));
+
+    rd = _mm_div_sd(_mm_set_sd(1.0), det);
+//     #ifdef ZERO_SINGULAR
+//         rd = _mm_and_pd(_mm_cmpneq_sd(det,_mm_setzero_pd()), rd);
+//     #endif
+    rd = _mm_shuffle_pd(rd,rd,0);
+
+    //  iB = C*|B| - D*B#*A
+    dB = _mm_shuffle_pd(dB,dB,0);
+    iB1 = _mm_sub_pd(_mm_mul_pd(C1, dB), iB1);
+    iB2 = _mm_sub_pd(_mm_mul_pd(C2, dB), iB2);
+
+    d1 = _mm_xor_pd(rd, _Sign_PN);
+    d2 = _mm_xor_pd(rd, _Sign_NP);
+
+    //  iC = B*|C| - A*C#*D;
+    dC = _mm_shuffle_pd(dC,dC,0);
+    iC1 = _mm_sub_pd(_mm_mul_pd(B1, dC), iC1);
+    iC2 = _mm_sub_pd(_mm_mul_pd(B2, dC), iC2);
+
+    Index res_stride = result.outerStride();
+    double* res = result.data();
+    pstoret<double, Packet2d, ResultAlignment>(res+0,             _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 3), d1));
+    pstoret<double, Packet2d, ResultAlignment>(res+res_stride,    _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 0), d2));
+    pstoret<double, Packet2d, ResultAlignment>(res+2,             _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 3), d1));
+    pstoret<double, Packet2d, ResultAlignment>(res+res_stride+2,  _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 0), d2));
+    pstoret<double, Packet2d, ResultAlignment>(res+2*res_stride,  _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 3), d1));
+    pstoret<double, Packet2d, ResultAlignment>(res+3*res_stride,  _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 0), d2));
+    pstoret<double, Packet2d, ResultAlignment>(res+2*res_stride+2,_mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 3), d1));
+    pstoret<double, Packet2d, ResultAlignment>(res+3*res_stride+2,_mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 0), d2));
+  }
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_INVERSE_SSE_H