Add preliminary PhysX 4.0 backend for PyBullet
Add inverse dynamics / mass matrix code from DeepMimic, thanks to Xue Bin (Jason) Peng Add example how to use stable PD control for humanoid with spherical joints (see humanoidMotionCapture.py) Fix related to TinyRenderer object transforms not updating when using collision filtering
This commit is contained in:
775
examples/ThirdPartyLibs/Eigen/src/SparseLU/SparseLU.h
Normal file
775
examples/ThirdPartyLibs/Eigen/src/SparseLU/SparseLU.h
Normal file
@@ -0,0 +1,775 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
|
||||
// Copyright (C) 2012-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
|
||||
#ifndef EIGEN_SPARSE_LU_H
|
||||
#define EIGEN_SPARSE_LU_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
template <typename _MatrixType, typename _OrderingType = COLAMDOrdering<typename _MatrixType::StorageIndex> > class SparseLU;
|
||||
template <typename MappedSparseMatrixType> struct SparseLUMatrixLReturnType;
|
||||
template <typename MatrixLType, typename MatrixUType> struct SparseLUMatrixUReturnType;
|
||||
|
||||
/** \ingroup SparseLU_Module
|
||||
* \class SparseLU
|
||||
*
|
||||
* \brief Sparse supernodal LU factorization for general matrices
|
||||
*
|
||||
* This class implements the supernodal LU factorization for general matrices.
|
||||
* It uses the main techniques from the sequential SuperLU package
|
||||
* (http://crd-legacy.lbl.gov/~xiaoye/SuperLU/). It handles transparently real
|
||||
* and complex arithmetics with single and double precision, depending on the
|
||||
* scalar type of your input matrix.
|
||||
* The code has been optimized to provide BLAS-3 operations during supernode-panel updates.
|
||||
* It benefits directly from the built-in high-performant Eigen BLAS routines.
|
||||
* Moreover, when the size of a supernode is very small, the BLAS calls are avoided to
|
||||
* enable a better optimization from the compiler. For best performance,
|
||||
* you should compile it with NDEBUG flag to avoid the numerous bounds checking on vectors.
|
||||
*
|
||||
* An important parameter of this class is the ordering method. It is used to reorder the columns
|
||||
* (and eventually the rows) of the matrix to reduce the number of new elements that are created during
|
||||
* numerical factorization. The cheapest method available is COLAMD.
|
||||
* See \link OrderingMethods_Module the OrderingMethods module \endlink for the list of
|
||||
* built-in and external ordering methods.
|
||||
*
|
||||
* Simple example with key steps
|
||||
* \code
|
||||
* VectorXd x(n), b(n);
|
||||
* SparseMatrix<double, ColMajor> A;
|
||||
* SparseLU<SparseMatrix<scalar, ColMajor>, COLAMDOrdering<Index> > solver;
|
||||
* // fill A and b;
|
||||
* // Compute the ordering permutation vector from the structural pattern of A
|
||||
* solver.analyzePattern(A);
|
||||
* // Compute the numerical factorization
|
||||
* solver.factorize(A);
|
||||
* //Use the factors to solve the linear system
|
||||
* x = solver.solve(b);
|
||||
* \endcode
|
||||
*
|
||||
* \warning The input matrix A should be in a \b compressed and \b column-major form.
|
||||
* Otherwise an expensive copy will be made. You can call the inexpensive makeCompressed() to get a compressed matrix.
|
||||
*
|
||||
* \note Unlike the initial SuperLU implementation, there is no step to equilibrate the matrix.
|
||||
* For badly scaled matrices, this step can be useful to reduce the pivoting during factorization.
|
||||
* If this is the case for your matrices, you can try the basic scaling method at
|
||||
* "unsupported/Eigen/src/IterativeSolvers/Scaling.h"
|
||||
*
|
||||
* \tparam _MatrixType The type of the sparse matrix. It must be a column-major SparseMatrix<>
|
||||
* \tparam _OrderingType The ordering method to use, either AMD, COLAMD or METIS. Default is COLMAD
|
||||
*
|
||||
* \implsparsesolverconcept
|
||||
*
|
||||
* \sa \ref TutorialSparseSolverConcept
|
||||
* \sa \ref OrderingMethods_Module
|
||||
*/
|
||||
template <typename _MatrixType, typename _OrderingType>
|
||||
class SparseLU : public SparseSolverBase<SparseLU<_MatrixType,_OrderingType> >, public internal::SparseLUImpl<typename _MatrixType::Scalar, typename _MatrixType::StorageIndex>
|
||||
{
|
||||
protected:
|
||||
typedef SparseSolverBase<SparseLU<_MatrixType,_OrderingType> > APIBase;
|
||||
using APIBase::m_isInitialized;
|
||||
public:
|
||||
using APIBase::_solve_impl;
|
||||
|
||||
typedef _MatrixType MatrixType;
|
||||
typedef _OrderingType OrderingType;
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename MatrixType::RealScalar RealScalar;
|
||||
typedef typename MatrixType::StorageIndex StorageIndex;
|
||||
typedef SparseMatrix<Scalar,ColMajor,StorageIndex> NCMatrix;
|
||||
typedef internal::MappedSuperNodalMatrix<Scalar, StorageIndex> SCMatrix;
|
||||
typedef Matrix<Scalar,Dynamic,1> ScalarVector;
|
||||
typedef Matrix<StorageIndex,Dynamic,1> IndexVector;
|
||||
typedef PermutationMatrix<Dynamic, Dynamic, StorageIndex> PermutationType;
|
||||
typedef internal::SparseLUImpl<Scalar, StorageIndex> Base;
|
||||
|
||||
enum {
|
||||
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
|
||||
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
|
||||
};
|
||||
|
||||
public:
|
||||
SparseLU():m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)
|
||||
{
|
||||
initperfvalues();
|
||||
}
|
||||
explicit SparseLU(const MatrixType& matrix)
|
||||
: m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)
|
||||
{
|
||||
initperfvalues();
|
||||
compute(matrix);
|
||||
}
|
||||
|
||||
~SparseLU()
|
||||
{
|
||||
// Free all explicit dynamic pointers
|
||||
}
|
||||
|
||||
void analyzePattern (const MatrixType& matrix);
|
||||
void factorize (const MatrixType& matrix);
|
||||
void simplicialfactorize(const MatrixType& matrix);
|
||||
|
||||
/**
|
||||
* Compute the symbolic and numeric factorization of the input sparse matrix.
|
||||
* The input matrix should be in column-major storage.
|
||||
*/
|
||||
void compute (const MatrixType& matrix)
|
||||
{
|
||||
// Analyze
|
||||
analyzePattern(matrix);
|
||||
//Factorize
|
||||
factorize(matrix);
|
||||
}
|
||||
|
||||
inline Index rows() const { return m_mat.rows(); }
|
||||
inline Index cols() const { return m_mat.cols(); }
|
||||
/** Indicate that the pattern of the input matrix is symmetric */
|
||||
void isSymmetric(bool sym)
|
||||
{
|
||||
m_symmetricmode = sym;
|
||||
}
|
||||
|
||||
/** \returns an expression of the matrix L, internally stored as supernodes
|
||||
* The only operation available with this expression is the triangular solve
|
||||
* \code
|
||||
* y = b; matrixL().solveInPlace(y);
|
||||
* \endcode
|
||||
*/
|
||||
SparseLUMatrixLReturnType<SCMatrix> matrixL() const
|
||||
{
|
||||
return SparseLUMatrixLReturnType<SCMatrix>(m_Lstore);
|
||||
}
|
||||
/** \returns an expression of the matrix U,
|
||||
* The only operation available with this expression is the triangular solve
|
||||
* \code
|
||||
* y = b; matrixU().solveInPlace(y);
|
||||
* \endcode
|
||||
*/
|
||||
SparseLUMatrixUReturnType<SCMatrix,MappedSparseMatrix<Scalar,ColMajor,StorageIndex> > matrixU() const
|
||||
{
|
||||
return SparseLUMatrixUReturnType<SCMatrix, MappedSparseMatrix<Scalar,ColMajor,StorageIndex> >(m_Lstore, m_Ustore);
|
||||
}
|
||||
|
||||
/**
|
||||
* \returns a reference to the row matrix permutation \f$ P_r \f$ such that \f$P_r A P_c^T = L U\f$
|
||||
* \sa colsPermutation()
|
||||
*/
|
||||
inline const PermutationType& rowsPermutation() const
|
||||
{
|
||||
return m_perm_r;
|
||||
}
|
||||
/**
|
||||
* \returns a reference to the column matrix permutation\f$ P_c^T \f$ such that \f$P_r A P_c^T = L U\f$
|
||||
* \sa rowsPermutation()
|
||||
*/
|
||||
inline const PermutationType& colsPermutation() const
|
||||
{
|
||||
return m_perm_c;
|
||||
}
|
||||
/** Set the threshold used for a diagonal entry to be an acceptable pivot. */
|
||||
void setPivotThreshold(const RealScalar& thresh)
|
||||
{
|
||||
m_diagpivotthresh = thresh;
|
||||
}
|
||||
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A.
|
||||
*
|
||||
* \warning the destination matrix X in X = this->solve(B) must be colmun-major.
|
||||
*
|
||||
* \sa compute()
|
||||
*/
|
||||
template<typename Rhs>
|
||||
inline const Solve<SparseLU, Rhs> solve(const MatrixBase<Rhs>& B) const;
|
||||
#endif // EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
/** \brief Reports whether previous computation was successful.
|
||||
*
|
||||
* \returns \c Success if computation was succesful,
|
||||
* \c NumericalIssue if the LU factorization reports a problem, zero diagonal for instance
|
||||
* \c InvalidInput if the input matrix is invalid
|
||||
*
|
||||
* \sa iparm()
|
||||
*/
|
||||
ComputationInfo info() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "Decomposition is not initialized.");
|
||||
return m_info;
|
||||
}
|
||||
|
||||
/**
|
||||
* \returns A string describing the type of error
|
||||
*/
|
||||
std::string lastErrorMessage() const
|
||||
{
|
||||
return m_lastError;
|
||||
}
|
||||
|
||||
template<typename Rhs, typename Dest>
|
||||
bool _solve_impl(const MatrixBase<Rhs> &B, MatrixBase<Dest> &X_base) const
|
||||
{
|
||||
Dest& X(X_base.derived());
|
||||
eigen_assert(m_factorizationIsOk && "The matrix should be factorized first");
|
||||
EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0,
|
||||
THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
|
||||
|
||||
// Permute the right hand side to form X = Pr*B
|
||||
// on return, X is overwritten by the computed solution
|
||||
X.resize(B.rows(),B.cols());
|
||||
|
||||
// this ugly const_cast_derived() helps to detect aliasing when applying the permutations
|
||||
for(Index j = 0; j < B.cols(); ++j)
|
||||
X.col(j) = rowsPermutation() * B.const_cast_derived().col(j);
|
||||
|
||||
//Forward substitution with L
|
||||
this->matrixL().solveInPlace(X);
|
||||
this->matrixU().solveInPlace(X);
|
||||
|
||||
// Permute back the solution
|
||||
for (Index j = 0; j < B.cols(); ++j)
|
||||
X.col(j) = colsPermutation().inverse() * X.col(j);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* \returns the absolute value of the determinant of the matrix of which
|
||||
* *this is the QR decomposition.
|
||||
*
|
||||
* \warning a determinant can be very big or small, so for matrices
|
||||
* of large enough dimension, there is a risk of overflow/underflow.
|
||||
* One way to work around that is to use logAbsDeterminant() instead.
|
||||
*
|
||||
* \sa logAbsDeterminant(), signDeterminant()
|
||||
*/
|
||||
Scalar absDeterminant()
|
||||
{
|
||||
using std::abs;
|
||||
eigen_assert(m_factorizationIsOk && "The matrix should be factorized first.");
|
||||
// Initialize with the determinant of the row matrix
|
||||
Scalar det = Scalar(1.);
|
||||
// Note that the diagonal blocks of U are stored in supernodes,
|
||||
// which are available in the L part :)
|
||||
for (Index j = 0; j < this->cols(); ++j)
|
||||
{
|
||||
for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)
|
||||
{
|
||||
if(it.index() == j)
|
||||
{
|
||||
det *= abs(it.value());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return det;
|
||||
}
|
||||
|
||||
/** \returns the natural log of the absolute value of the determinant of the matrix
|
||||
* of which **this is the QR decomposition
|
||||
*
|
||||
* \note This method is useful to work around the risk of overflow/underflow that's
|
||||
* inherent to the determinant computation.
|
||||
*
|
||||
* \sa absDeterminant(), signDeterminant()
|
||||
*/
|
||||
Scalar logAbsDeterminant() const
|
||||
{
|
||||
using std::log;
|
||||
using std::abs;
|
||||
|
||||
eigen_assert(m_factorizationIsOk && "The matrix should be factorized first.");
|
||||
Scalar det = Scalar(0.);
|
||||
for (Index j = 0; j < this->cols(); ++j)
|
||||
{
|
||||
for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)
|
||||
{
|
||||
if(it.row() < j) continue;
|
||||
if(it.row() == j)
|
||||
{
|
||||
det += log(abs(it.value()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return det;
|
||||
}
|
||||
|
||||
/** \returns A number representing the sign of the determinant
|
||||
*
|
||||
* \sa absDeterminant(), logAbsDeterminant()
|
||||
*/
|
||||
Scalar signDeterminant()
|
||||
{
|
||||
eigen_assert(m_factorizationIsOk && "The matrix should be factorized first.");
|
||||
// Initialize with the determinant of the row matrix
|
||||
Index det = 1;
|
||||
// Note that the diagonal blocks of U are stored in supernodes,
|
||||
// which are available in the L part :)
|
||||
for (Index j = 0; j < this->cols(); ++j)
|
||||
{
|
||||
for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)
|
||||
{
|
||||
if(it.index() == j)
|
||||
{
|
||||
if(it.value()<0)
|
||||
det = -det;
|
||||
else if(it.value()==0)
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return det * m_detPermR * m_detPermC;
|
||||
}
|
||||
|
||||
/** \returns The determinant of the matrix.
|
||||
*
|
||||
* \sa absDeterminant(), logAbsDeterminant()
|
||||
*/
|
||||
Scalar determinant()
|
||||
{
|
||||
eigen_assert(m_factorizationIsOk && "The matrix should be factorized first.");
|
||||
// Initialize with the determinant of the row matrix
|
||||
Scalar det = Scalar(1.);
|
||||
// Note that the diagonal blocks of U are stored in supernodes,
|
||||
// which are available in the L part :)
|
||||
for (Index j = 0; j < this->cols(); ++j)
|
||||
{
|
||||
for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)
|
||||
{
|
||||
if(it.index() == j)
|
||||
{
|
||||
det *= it.value();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return (m_detPermR * m_detPermC) > 0 ? det : -det;
|
||||
}
|
||||
|
||||
protected:
|
||||
// Functions
|
||||
void initperfvalues()
|
||||
{
|
||||
m_perfv.panel_size = 16;
|
||||
m_perfv.relax = 1;
|
||||
m_perfv.maxsuper = 128;
|
||||
m_perfv.rowblk = 16;
|
||||
m_perfv.colblk = 8;
|
||||
m_perfv.fillfactor = 20;
|
||||
}
|
||||
|
||||
// Variables
|
||||
mutable ComputationInfo m_info;
|
||||
bool m_factorizationIsOk;
|
||||
bool m_analysisIsOk;
|
||||
std::string m_lastError;
|
||||
NCMatrix m_mat; // The input (permuted ) matrix
|
||||
SCMatrix m_Lstore; // The lower triangular matrix (supernodal)
|
||||
MappedSparseMatrix<Scalar,ColMajor,StorageIndex> m_Ustore; // The upper triangular matrix
|
||||
PermutationType m_perm_c; // Column permutation
|
||||
PermutationType m_perm_r ; // Row permutation
|
||||
IndexVector m_etree; // Column elimination tree
|
||||
|
||||
typename Base::GlobalLU_t m_glu;
|
||||
|
||||
// SparseLU options
|
||||
bool m_symmetricmode;
|
||||
// values for performance
|
||||
internal::perfvalues m_perfv;
|
||||
RealScalar m_diagpivotthresh; // Specifies the threshold used for a diagonal entry to be an acceptable pivot
|
||||
Index m_nnzL, m_nnzU; // Nonzeros in L and U factors
|
||||
Index m_detPermR, m_detPermC; // Determinants of the permutation matrices
|
||||
private:
|
||||
// Disable copy constructor
|
||||
SparseLU (const SparseLU& );
|
||||
|
||||
}; // End class SparseLU
|
||||
|
||||
|
||||
|
||||
// Functions needed by the anaysis phase
|
||||
/**
|
||||
* Compute the column permutation to minimize the fill-in
|
||||
*
|
||||
* - Apply this permutation to the input matrix -
|
||||
*
|
||||
* - Compute the column elimination tree on the permuted matrix
|
||||
*
|
||||
* - Postorder the elimination tree and the column permutation
|
||||
*
|
||||
*/
|
||||
template <typename MatrixType, typename OrderingType>
|
||||
void SparseLU<MatrixType, OrderingType>::analyzePattern(const MatrixType& mat)
|
||||
{
|
||||
|
||||
//TODO It is possible as in SuperLU to compute row and columns scaling vectors to equilibrate the matrix mat.
|
||||
|
||||
// Firstly, copy the whole input matrix.
|
||||
m_mat = mat;
|
||||
|
||||
// Compute fill-in ordering
|
||||
OrderingType ord;
|
||||
ord(m_mat,m_perm_c);
|
||||
|
||||
// Apply the permutation to the column of the input matrix
|
||||
if (m_perm_c.size())
|
||||
{
|
||||
m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. FIXME : This vector is filled but not subsequently used.
|
||||
// Then, permute only the column pointers
|
||||
ei_declare_aligned_stack_constructed_variable(StorageIndex,outerIndexPtr,mat.cols()+1,mat.isCompressed()?const_cast<StorageIndex*>(mat.outerIndexPtr()):0);
|
||||
|
||||
// If the input matrix 'mat' is uncompressed, then the outer-indices do not match the ones of m_mat, and a copy is thus needed.
|
||||
if(!mat.isCompressed())
|
||||
IndexVector::Map(outerIndexPtr, mat.cols()+1) = IndexVector::Map(m_mat.outerIndexPtr(),mat.cols()+1);
|
||||
|
||||
// Apply the permutation and compute the nnz per column.
|
||||
for (Index i = 0; i < mat.cols(); i++)
|
||||
{
|
||||
m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i];
|
||||
m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i+1] - outerIndexPtr[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Compute the column elimination tree of the permuted matrix
|
||||
IndexVector firstRowElt;
|
||||
internal::coletree(m_mat, m_etree,firstRowElt);
|
||||
|
||||
// In symmetric mode, do not do postorder here
|
||||
if (!m_symmetricmode) {
|
||||
IndexVector post, iwork;
|
||||
// Post order etree
|
||||
internal::treePostorder(StorageIndex(m_mat.cols()), m_etree, post);
|
||||
|
||||
|
||||
// Renumber etree in postorder
|
||||
Index m = m_mat.cols();
|
||||
iwork.resize(m+1);
|
||||
for (Index i = 0; i < m; ++i) iwork(post(i)) = post(m_etree(i));
|
||||
m_etree = iwork;
|
||||
|
||||
// Postmultiply A*Pc by post, i.e reorder the matrix according to the postorder of the etree
|
||||
PermutationType post_perm(m);
|
||||
for (Index i = 0; i < m; i++)
|
||||
post_perm.indices()(i) = post(i);
|
||||
|
||||
// Combine the two permutations : postorder the permutation for future use
|
||||
if(m_perm_c.size()) {
|
||||
m_perm_c = post_perm * m_perm_c;
|
||||
}
|
||||
|
||||
} // end postordering
|
||||
|
||||
m_analysisIsOk = true;
|
||||
}
|
||||
|
||||
// Functions needed by the numerical factorization phase
|
||||
|
||||
|
||||
/**
|
||||
* - Numerical factorization
|
||||
* - Interleaved with the symbolic factorization
|
||||
* On exit, info is
|
||||
*
|
||||
* = 0: successful factorization
|
||||
*
|
||||
* > 0: if info = i, and i is
|
||||
*
|
||||
* <= A->ncol: U(i,i) is exactly zero. The factorization has
|
||||
* been completed, but the factor U is exactly singular,
|
||||
* and division by zero will occur if it is used to solve a
|
||||
* system of equations.
|
||||
*
|
||||
* > A->ncol: number of bytes allocated when memory allocation
|
||||
* failure occurred, plus A->ncol. If lwork = -1, it is
|
||||
* the estimated amount of space needed, plus A->ncol.
|
||||
*/
|
||||
template <typename MatrixType, typename OrderingType>
|
||||
void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
|
||||
{
|
||||
using internal::emptyIdxLU;
|
||||
eigen_assert(m_analysisIsOk && "analyzePattern() should be called first");
|
||||
eigen_assert((matrix.rows() == matrix.cols()) && "Only for squared matrices");
|
||||
|
||||
typedef typename IndexVector::Scalar StorageIndex;
|
||||
|
||||
m_isInitialized = true;
|
||||
|
||||
|
||||
// Apply the column permutation computed in analyzepattern()
|
||||
// m_mat = matrix * m_perm_c.inverse();
|
||||
m_mat = matrix;
|
||||
if (m_perm_c.size())
|
||||
{
|
||||
m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers.
|
||||
//Then, permute only the column pointers
|
||||
const StorageIndex * outerIndexPtr;
|
||||
if (matrix.isCompressed()) outerIndexPtr = matrix.outerIndexPtr();
|
||||
else
|
||||
{
|
||||
StorageIndex* outerIndexPtr_t = new StorageIndex[matrix.cols()+1];
|
||||
for(Index i = 0; i <= matrix.cols(); i++) outerIndexPtr_t[i] = m_mat.outerIndexPtr()[i];
|
||||
outerIndexPtr = outerIndexPtr_t;
|
||||
}
|
||||
for (Index i = 0; i < matrix.cols(); i++)
|
||||
{
|
||||
m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i];
|
||||
m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i+1] - outerIndexPtr[i];
|
||||
}
|
||||
if(!matrix.isCompressed()) delete[] outerIndexPtr;
|
||||
}
|
||||
else
|
||||
{ //FIXME This should not be needed if the empty permutation is handled transparently
|
||||
m_perm_c.resize(matrix.cols());
|
||||
for(StorageIndex i = 0; i < matrix.cols(); ++i) m_perm_c.indices()(i) = i;
|
||||
}
|
||||
|
||||
Index m = m_mat.rows();
|
||||
Index n = m_mat.cols();
|
||||
Index nnz = m_mat.nonZeros();
|
||||
Index maxpanel = m_perfv.panel_size * m;
|
||||
// Allocate working storage common to the factor routines
|
||||
Index lwork = 0;
|
||||
Index info = Base::memInit(m, n, nnz, lwork, m_perfv.fillfactor, m_perfv.panel_size, m_glu);
|
||||
if (info)
|
||||
{
|
||||
m_lastError = "UNABLE TO ALLOCATE WORKING MEMORY\n\n" ;
|
||||
m_factorizationIsOk = false;
|
||||
return ;
|
||||
}
|
||||
|
||||
// Set up pointers for integer working arrays
|
||||
IndexVector segrep(m); segrep.setZero();
|
||||
IndexVector parent(m); parent.setZero();
|
||||
IndexVector xplore(m); xplore.setZero();
|
||||
IndexVector repfnz(maxpanel);
|
||||
IndexVector panel_lsub(maxpanel);
|
||||
IndexVector xprune(n); xprune.setZero();
|
||||
IndexVector marker(m*internal::LUNoMarker); marker.setZero();
|
||||
|
||||
repfnz.setConstant(-1);
|
||||
panel_lsub.setConstant(-1);
|
||||
|
||||
// Set up pointers for scalar working arrays
|
||||
ScalarVector dense;
|
||||
dense.setZero(maxpanel);
|
||||
ScalarVector tempv;
|
||||
tempv.setZero(internal::LUnumTempV(m, m_perfv.panel_size, m_perfv.maxsuper, /*m_perfv.rowblk*/m) );
|
||||
|
||||
// Compute the inverse of perm_c
|
||||
PermutationType iperm_c(m_perm_c.inverse());
|
||||
|
||||
// Identify initial relaxed snodes
|
||||
IndexVector relax_end(n);
|
||||
if ( m_symmetricmode == true )
|
||||
Base::heap_relax_snode(n, m_etree, m_perfv.relax, marker, relax_end);
|
||||
else
|
||||
Base::relax_snode(n, m_etree, m_perfv.relax, marker, relax_end);
|
||||
|
||||
|
||||
m_perm_r.resize(m);
|
||||
m_perm_r.indices().setConstant(-1);
|
||||
marker.setConstant(-1);
|
||||
m_detPermR = 1; // Record the determinant of the row permutation
|
||||
|
||||
m_glu.supno(0) = emptyIdxLU; m_glu.xsup.setConstant(0);
|
||||
m_glu.xsup(0) = m_glu.xlsub(0) = m_glu.xusub(0) = m_glu.xlusup(0) = Index(0);
|
||||
|
||||
// Work on one 'panel' at a time. A panel is one of the following :
|
||||
// (a) a relaxed supernode at the bottom of the etree, or
|
||||
// (b) panel_size contiguous columns, <panel_size> defined by the user
|
||||
Index jcol;
|
||||
IndexVector panel_histo(n);
|
||||
Index pivrow; // Pivotal row number in the original row matrix
|
||||
Index nseg1; // Number of segments in U-column above panel row jcol
|
||||
Index nseg; // Number of segments in each U-column
|
||||
Index irep;
|
||||
Index i, k, jj;
|
||||
for (jcol = 0; jcol < n; )
|
||||
{
|
||||
// Adjust panel size so that a panel won't overlap with the next relaxed snode.
|
||||
Index panel_size = m_perfv.panel_size; // upper bound on panel width
|
||||
for (k = jcol + 1; k < (std::min)(jcol+panel_size, n); k++)
|
||||
{
|
||||
if (relax_end(k) != emptyIdxLU)
|
||||
{
|
||||
panel_size = k - jcol;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (k == n)
|
||||
panel_size = n - jcol;
|
||||
|
||||
// Symbolic outer factorization on a panel of columns
|
||||
Base::panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu);
|
||||
|
||||
// Numeric sup-panel updates in topological order
|
||||
Base::panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_glu);
|
||||
|
||||
// Sparse LU within the panel, and below the panel diagonal
|
||||
for ( jj = jcol; jj< jcol + panel_size; jj++)
|
||||
{
|
||||
k = (jj - jcol) * m; // Column index for w-wide arrays
|
||||
|
||||
nseg = nseg1; // begin after all the panel segments
|
||||
//Depth-first-search for the current column
|
||||
VectorBlock<IndexVector> panel_lsubk(panel_lsub, k, m);
|
||||
VectorBlock<IndexVector> repfnz_k(repfnz, k, m);
|
||||
info = Base::column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu);
|
||||
if ( info )
|
||||
{
|
||||
m_lastError = "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() ";
|
||||
m_info = NumericalIssue;
|
||||
m_factorizationIsOk = false;
|
||||
return;
|
||||
}
|
||||
// Numeric updates to this column
|
||||
VectorBlock<ScalarVector> dense_k(dense, k, m);
|
||||
VectorBlock<IndexVector> segrep_k(segrep, nseg1, m-nseg1);
|
||||
info = Base::column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu);
|
||||
if ( info )
|
||||
{
|
||||
m_lastError = "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() ";
|
||||
m_info = NumericalIssue;
|
||||
m_factorizationIsOk = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// Copy the U-segments to ucol(*)
|
||||
info = Base::copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu);
|
||||
if ( info )
|
||||
{
|
||||
m_lastError = "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() ";
|
||||
m_info = NumericalIssue;
|
||||
m_factorizationIsOk = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// Form the L-segment
|
||||
info = Base::pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu);
|
||||
if ( info )
|
||||
{
|
||||
m_lastError = "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT ";
|
||||
std::ostringstream returnInfo;
|
||||
returnInfo << info;
|
||||
m_lastError += returnInfo.str();
|
||||
m_info = NumericalIssue;
|
||||
m_factorizationIsOk = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// Update the determinant of the row permutation matrix
|
||||
// FIXME: the following test is not correct, we should probably take iperm_c into account and pivrow is not directly the row pivot.
|
||||
if (pivrow != jj) m_detPermR = -m_detPermR;
|
||||
|
||||
// Prune columns (0:jj-1) using column jj
|
||||
Base::pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu);
|
||||
|
||||
// Reset repfnz for this column
|
||||
for (i = 0; i < nseg; i++)
|
||||
{
|
||||
irep = segrep(i);
|
||||
repfnz_k(irep) = emptyIdxLU;
|
||||
}
|
||||
} // end SparseLU within the panel
|
||||
jcol += panel_size; // Move to the next panel
|
||||
} // end for -- end elimination
|
||||
|
||||
m_detPermR = m_perm_r.determinant();
|
||||
m_detPermC = m_perm_c.determinant();
|
||||
|
||||
// Count the number of nonzeros in factors
|
||||
Base::countnz(n, m_nnzL, m_nnzU, m_glu);
|
||||
// Apply permutation to the L subscripts
|
||||
Base::fixupL(n, m_perm_r.indices(), m_glu);
|
||||
|
||||
// Create supernode matrix L
|
||||
m_Lstore.setInfos(m, n, m_glu.lusup, m_glu.xlusup, m_glu.lsub, m_glu.xlsub, m_glu.supno, m_glu.xsup);
|
||||
// Create the column major upper sparse matrix U;
|
||||
new (&m_Ustore) MappedSparseMatrix<Scalar, ColMajor, StorageIndex> ( m, n, m_nnzU, m_glu.xusub.data(), m_glu.usub.data(), m_glu.ucol.data() );
|
||||
|
||||
m_info = Success;
|
||||
m_factorizationIsOk = true;
|
||||
}
|
||||
|
||||
template<typename MappedSupernodalType>
|
||||
struct SparseLUMatrixLReturnType : internal::no_assignment_operator
|
||||
{
|
||||
typedef typename MappedSupernodalType::Scalar Scalar;
|
||||
explicit SparseLUMatrixLReturnType(const MappedSupernodalType& mapL) : m_mapL(mapL)
|
||||
{ }
|
||||
Index rows() { return m_mapL.rows(); }
|
||||
Index cols() { return m_mapL.cols(); }
|
||||
template<typename Dest>
|
||||
void solveInPlace( MatrixBase<Dest> &X) const
|
||||
{
|
||||
m_mapL.solveInPlace(X);
|
||||
}
|
||||
const MappedSupernodalType& m_mapL;
|
||||
};
|
||||
|
||||
template<typename MatrixLType, typename MatrixUType>
|
||||
struct SparseLUMatrixUReturnType : internal::no_assignment_operator
|
||||
{
|
||||
typedef typename MatrixLType::Scalar Scalar;
|
||||
SparseLUMatrixUReturnType(const MatrixLType& mapL, const MatrixUType& mapU)
|
||||
: m_mapL(mapL),m_mapU(mapU)
|
||||
{ }
|
||||
Index rows() { return m_mapL.rows(); }
|
||||
Index cols() { return m_mapL.cols(); }
|
||||
|
||||
template<typename Dest> void solveInPlace(MatrixBase<Dest> &X) const
|
||||
{
|
||||
Index nrhs = X.cols();
|
||||
Index n = X.rows();
|
||||
// Backward solve with U
|
||||
for (Index k = m_mapL.nsuper(); k >= 0; k--)
|
||||
{
|
||||
Index fsupc = m_mapL.supToCol()[k];
|
||||
Index lda = m_mapL.colIndexPtr()[fsupc+1] - m_mapL.colIndexPtr()[fsupc]; // leading dimension
|
||||
Index nsupc = m_mapL.supToCol()[k+1] - fsupc;
|
||||
Index luptr = m_mapL.colIndexPtr()[fsupc];
|
||||
|
||||
if (nsupc == 1)
|
||||
{
|
||||
for (Index j = 0; j < nrhs; j++)
|
||||
{
|
||||
X(fsupc, j) /= m_mapL.valuePtr()[luptr];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(m_mapL.valuePtr()[luptr]), nsupc, nsupc, OuterStride<>(lda) );
|
||||
Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) );
|
||||
U = A.template triangularView<Upper>().solve(U);
|
||||
}
|
||||
|
||||
for (Index j = 0; j < nrhs; ++j)
|
||||
{
|
||||
for (Index jcol = fsupc; jcol < fsupc + nsupc; jcol++)
|
||||
{
|
||||
typename MatrixUType::InnerIterator it(m_mapU, jcol);
|
||||
for ( ; it; ++it)
|
||||
{
|
||||
Index irow = it.index();
|
||||
X(irow, j) -= X(jcol, j) * it.value();
|
||||
}
|
||||
}
|
||||
}
|
||||
} // End For U-solve
|
||||
}
|
||||
const MatrixLType& m_mapL;
|
||||
const MatrixUType& m_mapU;
|
||||
};
|
||||
|
||||
} // End namespace Eigen
|
||||
|
||||
#endif
|
||||
66
examples/ThirdPartyLibs/Eigen/src/SparseLU/SparseLUImpl.h
Normal file
66
examples/ThirdPartyLibs/Eigen/src/SparseLU/SparseLUImpl.h
Normal file
@@ -0,0 +1,66 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#ifndef SPARSELU_IMPL_H
|
||||
#define SPARSELU_IMPL_H
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
/** \ingroup SparseLU_Module
|
||||
* \class SparseLUImpl
|
||||
* Base class for sparseLU
|
||||
*/
|
||||
template <typename Scalar, typename StorageIndex>
|
||||
class SparseLUImpl
|
||||
{
|
||||
public:
|
||||
typedef Matrix<Scalar,Dynamic,1> ScalarVector;
|
||||
typedef Matrix<StorageIndex,Dynamic,1> IndexVector;
|
||||
typedef Matrix<Scalar,Dynamic,Dynamic,ColMajor> ScalarMatrix;
|
||||
typedef Map<ScalarMatrix, 0, OuterStride<> > MappedMatrixBlock;
|
||||
typedef typename ScalarVector::RealScalar RealScalar;
|
||||
typedef Ref<Matrix<Scalar,Dynamic,1> > BlockScalarVector;
|
||||
typedef Ref<Matrix<StorageIndex,Dynamic,1> > BlockIndexVector;
|
||||
typedef LU_GlobalLU_t<IndexVector, ScalarVector> GlobalLU_t;
|
||||
typedef SparseMatrix<Scalar,ColMajor,StorageIndex> MatrixType;
|
||||
|
||||
protected:
|
||||
template <typename VectorType>
|
||||
Index expand(VectorType& vec, Index& length, Index nbElts, Index keep_prev, Index& num_expansions);
|
||||
Index memInit(Index m, Index n, Index annz, Index lwork, Index fillratio, Index panel_size, GlobalLU_t& glu);
|
||||
template <typename VectorType>
|
||||
Index memXpand(VectorType& vec, Index& maxlen, Index nbElts, MemType memtype, Index& num_expansions);
|
||||
void heap_relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end);
|
||||
void relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end);
|
||||
Index snode_dfs(const Index jcol, const Index kcol,const MatrixType& mat, IndexVector& xprune, IndexVector& marker, GlobalLU_t& glu);
|
||||
Index snode_bmod (const Index jcol, const Index fsupc, ScalarVector& dense, GlobalLU_t& glu);
|
||||
Index pivotL(const Index jcol, const RealScalar& diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, Index& pivrow, GlobalLU_t& glu);
|
||||
template <typename Traits>
|
||||
void dfs_kernel(const StorageIndex jj, IndexVector& perm_r,
|
||||
Index& nseg, IndexVector& panel_lsub, IndexVector& segrep,
|
||||
Ref<IndexVector> repfnz_col, IndexVector& xprune, Ref<IndexVector> marker, IndexVector& parent,
|
||||
IndexVector& xplore, GlobalLU_t& glu, Index& nextl_col, Index krow, Traits& traits);
|
||||
void panel_dfs(const Index m, const Index w, const Index jcol, MatrixType& A, IndexVector& perm_r, Index& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu);
|
||||
|
||||
void panel_bmod(const Index m, const Index w, const Index jcol, const Index nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, GlobalLU_t& glu);
|
||||
Index column_dfs(const Index m, const Index jcol, IndexVector& perm_r, Index maxsuper, Index& nseg, BlockIndexVector lsub_col, IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu);
|
||||
Index column_bmod(const Index jcol, const Index nseg, BlockScalarVector dense, ScalarVector& tempv, BlockIndexVector segrep, BlockIndexVector repfnz, Index fpanelc, GlobalLU_t& glu);
|
||||
Index copy_to_ucol(const Index jcol, const Index nseg, IndexVector& segrep, BlockIndexVector repfnz ,IndexVector& perm_r, BlockScalarVector dense, GlobalLU_t& glu);
|
||||
void pruneL(const Index jcol, const IndexVector& perm_r, const Index pivrow, const Index nseg, const IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, GlobalLU_t& glu);
|
||||
void countnz(const Index n, Index& nnzL, Index& nnzU, GlobalLU_t& glu);
|
||||
void fixupL(const Index n, const IndexVector& perm_r, GlobalLU_t& glu);
|
||||
|
||||
template<typename , typename >
|
||||
friend struct column_dfs_traits;
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
} // namespace Eigen
|
||||
|
||||
#endif
|
||||
226
examples/ThirdPartyLibs/Eigen/src/SparseLU/SparseLU_Memory.h
Normal file
226
examples/ThirdPartyLibs/Eigen/src/SparseLU/SparseLU_Memory.h
Normal file
@@ -0,0 +1,226 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
/*
|
||||
|
||||
* NOTE: This file is the modified version of [s,d,c,z]memory.c files in SuperLU
|
||||
|
||||
* -- SuperLU routine (version 3.1) --
|
||||
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
|
||||
* and Lawrence Berkeley National Lab.
|
||||
* August 1, 2008
|
||||
*
|
||||
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
|
||||
*
|
||||
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
|
||||
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
|
||||
*
|
||||
* Permission is hereby granted to use or copy this program for any
|
||||
* purpose, provided the above notices are retained on all copies.
|
||||
* Permission to modify the code and to distribute modified code is
|
||||
* granted, provided the above notices are retained, and a notice that
|
||||
* the code was modified is included with the above copyright notice.
|
||||
*/
|
||||
|
||||
#ifndef EIGEN_SPARSELU_MEMORY
|
||||
#define EIGEN_SPARSELU_MEMORY
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
enum { LUNoMarker = 3 };
|
||||
enum {emptyIdxLU = -1};
|
||||
inline Index LUnumTempV(Index& m, Index& w, Index& t, Index& b)
|
||||
{
|
||||
return (std::max)(m, (t+b)*w);
|
||||
}
|
||||
|
||||
template< typename Scalar>
|
||||
inline Index LUTempSpace(Index&m, Index& w)
|
||||
{
|
||||
return (2*w + 4 + LUNoMarker) * m * sizeof(Index) + (w + 1) * m * sizeof(Scalar);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Expand the existing storage to accomodate more fill-ins
|
||||
* \param vec Valid pointer to the vector to allocate or expand
|
||||
* \param[in,out] length At input, contain the current length of the vector that is to be increased. At output, length of the newly allocated vector
|
||||
* \param[in] nbElts Current number of elements in the factors
|
||||
* \param keep_prev 1: use length and do not expand the vector; 0: compute new_len and expand
|
||||
* \param[in,out] num_expansions Number of times the memory has been expanded
|
||||
*/
|
||||
template <typename Scalar, typename StorageIndex>
|
||||
template <typename VectorType>
|
||||
Index SparseLUImpl<Scalar,StorageIndex>::expand(VectorType& vec, Index& length, Index nbElts, Index keep_prev, Index& num_expansions)
|
||||
{
|
||||
|
||||
float alpha = 1.5; // Ratio of the memory increase
|
||||
Index new_len; // New size of the allocated memory
|
||||
|
||||
if(num_expansions == 0 || keep_prev)
|
||||
new_len = length ; // First time allocate requested
|
||||
else
|
||||
new_len = (std::max)(length+1,Index(alpha * length));
|
||||
|
||||
VectorType old_vec; // Temporary vector to hold the previous values
|
||||
if (nbElts > 0 )
|
||||
old_vec = vec.segment(0,nbElts);
|
||||
|
||||
//Allocate or expand the current vector
|
||||
#ifdef EIGEN_EXCEPTIONS
|
||||
try
|
||||
#endif
|
||||
{
|
||||
vec.resize(new_len);
|
||||
}
|
||||
#ifdef EIGEN_EXCEPTIONS
|
||||
catch(std::bad_alloc& )
|
||||
#else
|
||||
if(!vec.size())
|
||||
#endif
|
||||
{
|
||||
if (!num_expansions)
|
||||
{
|
||||
// First time to allocate from LUMemInit()
|
||||
// Let LUMemInit() deals with it.
|
||||
return -1;
|
||||
}
|
||||
if (keep_prev)
|
||||
{
|
||||
// In this case, the memory length should not not be reduced
|
||||
return new_len;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Reduce the size and increase again
|
||||
Index tries = 0; // Number of attempts
|
||||
do
|
||||
{
|
||||
alpha = (alpha + 1)/2;
|
||||
new_len = (std::max)(length+1,Index(alpha * length));
|
||||
#ifdef EIGEN_EXCEPTIONS
|
||||
try
|
||||
#endif
|
||||
{
|
||||
vec.resize(new_len);
|
||||
}
|
||||
#ifdef EIGEN_EXCEPTIONS
|
||||
catch(std::bad_alloc& )
|
||||
#else
|
||||
if (!vec.size())
|
||||
#endif
|
||||
{
|
||||
tries += 1;
|
||||
if ( tries > 10) return new_len;
|
||||
}
|
||||
} while (!vec.size());
|
||||
}
|
||||
}
|
||||
//Copy the previous values to the newly allocated space
|
||||
if (nbElts > 0)
|
||||
vec.segment(0, nbElts) = old_vec;
|
||||
|
||||
|
||||
length = new_len;
|
||||
if(num_expansions) ++num_expansions;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Allocate various working space for the numerical factorization phase.
|
||||
* \param m number of rows of the input matrix
|
||||
* \param n number of columns
|
||||
* \param annz number of initial nonzeros in the matrix
|
||||
* \param lwork if lwork=-1, this routine returns an estimated size of the required memory
|
||||
* \param glu persistent data to facilitate multiple factors : will be deleted later ??
|
||||
* \param fillratio estimated ratio of fill in the factors
|
||||
* \param panel_size Size of a panel
|
||||
* \return an estimated size of the required memory if lwork = -1; otherwise, return the size of actually allocated memory when allocation failed, and 0 on success
|
||||
* \note Unlike SuperLU, this routine does not support successive factorization with the same pattern and the same row permutation
|
||||
*/
|
||||
template <typename Scalar, typename StorageIndex>
|
||||
Index SparseLUImpl<Scalar,StorageIndex>::memInit(Index m, Index n, Index annz, Index lwork, Index fillratio, Index panel_size, GlobalLU_t& glu)
|
||||
{
|
||||
Index& num_expansions = glu.num_expansions; //No memory expansions so far
|
||||
num_expansions = 0;
|
||||
glu.nzumax = glu.nzlumax = (std::min)(fillratio * (annz+1) / n, m) * n; // estimated number of nonzeros in U
|
||||
glu.nzlmax = (std::max)(Index(4), fillratio) * (annz+1) / 4; // estimated nnz in L factor
|
||||
// Return the estimated size to the user if necessary
|
||||
Index tempSpace;
|
||||
tempSpace = (2*panel_size + 4 + LUNoMarker) * m * sizeof(Index) + (panel_size + 1) * m * sizeof(Scalar);
|
||||
if (lwork == emptyIdxLU)
|
||||
{
|
||||
Index estimated_size;
|
||||
estimated_size = (5 * n + 5) * sizeof(Index) + tempSpace
|
||||
+ (glu.nzlmax + glu.nzumax) * sizeof(Index) + (glu.nzlumax+glu.nzumax) * sizeof(Scalar) + n;
|
||||
return estimated_size;
|
||||
}
|
||||
|
||||
// Setup the required space
|
||||
|
||||
// First allocate Integer pointers for L\U factors
|
||||
glu.xsup.resize(n+1);
|
||||
glu.supno.resize(n+1);
|
||||
glu.xlsub.resize(n+1);
|
||||
glu.xlusup.resize(n+1);
|
||||
glu.xusub.resize(n+1);
|
||||
|
||||
// Reserve memory for L/U factors
|
||||
do
|
||||
{
|
||||
if( (expand<ScalarVector>(glu.lusup, glu.nzlumax, 0, 0, num_expansions)<0)
|
||||
|| (expand<ScalarVector>(glu.ucol, glu.nzumax, 0, 0, num_expansions)<0)
|
||||
|| (expand<IndexVector> (glu.lsub, glu.nzlmax, 0, 0, num_expansions)<0)
|
||||
|| (expand<IndexVector> (glu.usub, glu.nzumax, 0, 1, num_expansions)<0) )
|
||||
{
|
||||
//Reduce the estimated size and retry
|
||||
glu.nzlumax /= 2;
|
||||
glu.nzumax /= 2;
|
||||
glu.nzlmax /= 2;
|
||||
if (glu.nzlumax < annz ) return glu.nzlumax;
|
||||
}
|
||||
} while (!glu.lusup.size() || !glu.ucol.size() || !glu.lsub.size() || !glu.usub.size());
|
||||
|
||||
++num_expansions;
|
||||
return 0;
|
||||
|
||||
} // end LuMemInit
|
||||
|
||||
/**
|
||||
* \brief Expand the existing storage
|
||||
* \param vec vector to expand
|
||||
* \param[in,out] maxlen On input, previous size of vec (Number of elements to copy ). on output, new size
|
||||
* \param nbElts current number of elements in the vector.
|
||||
* \param memtype Type of the element to expand
|
||||
* \param num_expansions Number of expansions
|
||||
* \return 0 on success, > 0 size of the memory allocated so far
|
||||
*/
|
||||
template <typename Scalar, typename StorageIndex>
|
||||
template <typename VectorType>
|
||||
Index SparseLUImpl<Scalar,StorageIndex>::memXpand(VectorType& vec, Index& maxlen, Index nbElts, MemType memtype, Index& num_expansions)
|
||||
{
|
||||
Index failed_size;
|
||||
if (memtype == USUB)
|
||||
failed_size = this->expand<VectorType>(vec, maxlen, nbElts, 1, num_expansions);
|
||||
else
|
||||
failed_size = this->expand<VectorType>(vec, maxlen, nbElts, 0, num_expansions);
|
||||
|
||||
if (failed_size)
|
||||
return failed_size;
|
||||
|
||||
return 0 ;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
#endif // EIGEN_SPARSELU_MEMORY
|
||||
110
examples/ThirdPartyLibs/Eigen/src/SparseLU/SparseLU_Structs.h
Normal file
110
examples/ThirdPartyLibs/Eigen/src/SparseLU/SparseLU_Structs.h
Normal file
@@ -0,0 +1,110 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
/*
|
||||
* NOTE: This file comes from a partly modified version of files slu_[s,d,c,z]defs.h
|
||||
* -- SuperLU routine (version 4.1) --
|
||||
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
|
||||
* and Lawrence Berkeley National Lab.
|
||||
* November, 2010
|
||||
*
|
||||
* Global data structures used in LU factorization -
|
||||
*
|
||||
* nsuper: #supernodes = nsuper + 1, numbered [0, nsuper].
|
||||
* (xsup,supno): supno[i] is the supernode no to which i belongs;
|
||||
* xsup(s) points to the beginning of the s-th supernode.
|
||||
* e.g. supno 0 1 2 2 3 3 3 4 4 4 4 4 (n=12)
|
||||
* xsup 0 1 2 4 7 12
|
||||
* Note: dfs will be performed on supernode rep. relative to the new
|
||||
* row pivoting ordering
|
||||
*
|
||||
* (xlsub,lsub): lsub[*] contains the compressed subscript of
|
||||
* rectangular supernodes; xlsub[j] points to the starting
|
||||
* location of the j-th column in lsub[*]. Note that xlsub
|
||||
* is indexed by column.
|
||||
* Storage: original row subscripts
|
||||
*
|
||||
* During the course of sparse LU factorization, we also use
|
||||
* (xlsub,lsub) for the purpose of symmetric pruning. For each
|
||||
* supernode {s,s+1,...,t=s+r} with first column s and last
|
||||
* column t, the subscript set
|
||||
* lsub[j], j=xlsub[s], .., xlsub[s+1]-1
|
||||
* is the structure of column s (i.e. structure of this supernode).
|
||||
* It is used for the storage of numerical values.
|
||||
* Furthermore,
|
||||
* lsub[j], j=xlsub[t], .., xlsub[t+1]-1
|
||||
* is the structure of the last column t of this supernode.
|
||||
* It is for the purpose of symmetric pruning. Therefore, the
|
||||
* structural subscripts can be rearranged without making physical
|
||||
* interchanges among the numerical values.
|
||||
*
|
||||
* However, if the supernode has only one column, then we
|
||||
* only keep one set of subscripts. For any subscript interchange
|
||||
* performed, similar interchange must be done on the numerical
|
||||
* values.
|
||||
*
|
||||
* The last column structures (for pruning) will be removed
|
||||
* after the numercial LU factorization phase.
|
||||
*
|
||||
* (xlusup,lusup): lusup[*] contains the numerical values of the
|
||||
* rectangular supernodes; xlusup[j] points to the starting
|
||||
* location of the j-th column in storage vector lusup[*]
|
||||
* Note: xlusup is indexed by column.
|
||||
* Each rectangular supernode is stored by column-major
|
||||
* scheme, consistent with Fortran 2-dim array storage.
|
||||
*
|
||||
* (xusub,ucol,usub): ucol[*] stores the numerical values of
|
||||
* U-columns outside the rectangular supernodes. The row
|
||||
* subscript of nonzero ucol[k] is stored in usub[k].
|
||||
* xusub[i] points to the starting location of column i in ucol.
|
||||
* Storage: new row subscripts; that is subscripts of PA.
|
||||
*/
|
||||
|
||||
#ifndef EIGEN_LU_STRUCTS
|
||||
#define EIGEN_LU_STRUCTS
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
typedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL} MemType;
|
||||
|
||||
template <typename IndexVector, typename ScalarVector>
|
||||
struct LU_GlobalLU_t {
|
||||
typedef typename IndexVector::Scalar StorageIndex;
|
||||
IndexVector xsup; //First supernode column ... xsup(s) points to the beginning of the s-th supernode
|
||||
IndexVector supno; // Supernode number corresponding to this column (column to supernode mapping)
|
||||
ScalarVector lusup; // nonzero values of L ordered by columns
|
||||
IndexVector lsub; // Compressed row indices of L rectangular supernodes.
|
||||
IndexVector xlusup; // pointers to the beginning of each column in lusup
|
||||
IndexVector xlsub; // pointers to the beginning of each column in lsub
|
||||
Index nzlmax; // Current max size of lsub
|
||||
Index nzlumax; // Current max size of lusup
|
||||
ScalarVector ucol; // nonzero values of U ordered by columns
|
||||
IndexVector usub; // row indices of U columns in ucol
|
||||
IndexVector xusub; // Pointers to the beginning of each column of U in ucol
|
||||
Index nzumax; // Current max size of ucol
|
||||
Index n; // Number of columns in the matrix
|
||||
Index num_expansions;
|
||||
};
|
||||
|
||||
// Values to set for performance
|
||||
struct perfvalues {
|
||||
Index panel_size; // a panel consists of at most <panel_size> consecutive columns
|
||||
Index relax; // To control degree of relaxing supernodes. If the number of nodes (columns)
|
||||
// in a subtree of the elimination tree is less than relax, this subtree is considered
|
||||
// as one supernode regardless of the row structures of those columns
|
||||
Index maxsuper; // The maximum size for a supernode in complete LU
|
||||
Index rowblk; // The minimum row dimension for 2-D blocking to be used;
|
||||
Index colblk; // The minimum column dimension for 2-D blocking to be used;
|
||||
Index fillfactor; // The estimated fills factors for L and U, compared with A
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
#endif // EIGEN_LU_STRUCTS
|
||||
@@ -0,0 +1,301 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
|
||||
// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_SPARSELU_SUPERNODAL_MATRIX_H
|
||||
#define EIGEN_SPARSELU_SUPERNODAL_MATRIX_H
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
/** \ingroup SparseLU_Module
|
||||
* \brief a class to manipulate the L supernodal factor from the SparseLU factorization
|
||||
*
|
||||
* This class contain the data to easily store
|
||||
* and manipulate the supernodes during the factorization and solution phase of Sparse LU.
|
||||
* Only the lower triangular matrix has supernodes.
|
||||
*
|
||||
* NOTE : This class corresponds to the SCformat structure in SuperLU
|
||||
*
|
||||
*/
|
||||
/* TODO
|
||||
* InnerIterator as for sparsematrix
|
||||
* SuperInnerIterator to iterate through all supernodes
|
||||
* Function for triangular solve
|
||||
*/
|
||||
template <typename _Scalar, typename _StorageIndex>
|
||||
class MappedSuperNodalMatrix
|
||||
{
|
||||
public:
|
||||
typedef _Scalar Scalar;
|
||||
typedef _StorageIndex StorageIndex;
|
||||
typedef Matrix<StorageIndex,Dynamic,1> IndexVector;
|
||||
typedef Matrix<Scalar,Dynamic,1> ScalarVector;
|
||||
public:
|
||||
MappedSuperNodalMatrix()
|
||||
{
|
||||
|
||||
}
|
||||
MappedSuperNodalMatrix(Index m, Index n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind,
|
||||
IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col )
|
||||
{
|
||||
setInfos(m, n, nzval, nzval_colptr, rowind, rowind_colptr, col_to_sup, sup_to_col);
|
||||
}
|
||||
|
||||
~MappedSuperNodalMatrix()
|
||||
{
|
||||
|
||||
}
|
||||
/**
|
||||
* Set appropriate pointers for the lower triangular supernodal matrix
|
||||
* These infos are available at the end of the numerical factorization
|
||||
* FIXME This class will be modified such that it can be use in the course
|
||||
* of the factorization.
|
||||
*/
|
||||
void setInfos(Index m, Index n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind,
|
||||
IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col )
|
||||
{
|
||||
m_row = m;
|
||||
m_col = n;
|
||||
m_nzval = nzval.data();
|
||||
m_nzval_colptr = nzval_colptr.data();
|
||||
m_rowind = rowind.data();
|
||||
m_rowind_colptr = rowind_colptr.data();
|
||||
m_nsuper = col_to_sup(n);
|
||||
m_col_to_sup = col_to_sup.data();
|
||||
m_sup_to_col = sup_to_col.data();
|
||||
}
|
||||
|
||||
/**
|
||||
* Number of rows
|
||||
*/
|
||||
Index rows() { return m_row; }
|
||||
|
||||
/**
|
||||
* Number of columns
|
||||
*/
|
||||
Index cols() { return m_col; }
|
||||
|
||||
/**
|
||||
* Return the array of nonzero values packed by column
|
||||
*
|
||||
* The size is nnz
|
||||
*/
|
||||
Scalar* valuePtr() { return m_nzval; }
|
||||
|
||||
const Scalar* valuePtr() const
|
||||
{
|
||||
return m_nzval;
|
||||
}
|
||||
/**
|
||||
* Return the pointers to the beginning of each column in \ref valuePtr()
|
||||
*/
|
||||
StorageIndex* colIndexPtr()
|
||||
{
|
||||
return m_nzval_colptr;
|
||||
}
|
||||
|
||||
const StorageIndex* colIndexPtr() const
|
||||
{
|
||||
return m_nzval_colptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the array of compressed row indices of all supernodes
|
||||
*/
|
||||
StorageIndex* rowIndex() { return m_rowind; }
|
||||
|
||||
const StorageIndex* rowIndex() const
|
||||
{
|
||||
return m_rowind;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the location in \em rowvaluePtr() which starts each column
|
||||
*/
|
||||
StorageIndex* rowIndexPtr() { return m_rowind_colptr; }
|
||||
|
||||
const StorageIndex* rowIndexPtr() const
|
||||
{
|
||||
return m_rowind_colptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the array of column-to-supernode mapping
|
||||
*/
|
||||
StorageIndex* colToSup() { return m_col_to_sup; }
|
||||
|
||||
const StorageIndex* colToSup() const
|
||||
{
|
||||
return m_col_to_sup;
|
||||
}
|
||||
/**
|
||||
* Return the array of supernode-to-column mapping
|
||||
*/
|
||||
StorageIndex* supToCol() { return m_sup_to_col; }
|
||||
|
||||
const StorageIndex* supToCol() const
|
||||
{
|
||||
return m_sup_to_col;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the number of supernodes
|
||||
*/
|
||||
Index nsuper() const
|
||||
{
|
||||
return m_nsuper;
|
||||
}
|
||||
|
||||
class InnerIterator;
|
||||
template<typename Dest>
|
||||
void solveInPlace( MatrixBase<Dest>&X) const;
|
||||
|
||||
|
||||
|
||||
|
||||
protected:
|
||||
Index m_row; // Number of rows
|
||||
Index m_col; // Number of columns
|
||||
Index m_nsuper; // Number of supernodes
|
||||
Scalar* m_nzval; //array of nonzero values packed by column
|
||||
StorageIndex* m_nzval_colptr; //nzval_colptr[j] Stores the location in nzval[] which starts column j
|
||||
StorageIndex* m_rowind; // Array of compressed row indices of rectangular supernodes
|
||||
StorageIndex* m_rowind_colptr; //rowind_colptr[j] stores the location in rowind[] which starts column j
|
||||
StorageIndex* m_col_to_sup; // col_to_sup[j] is the supernode number to which column j belongs
|
||||
StorageIndex* m_sup_to_col; //sup_to_col[s] points to the starting column of the s-th supernode
|
||||
|
||||
private :
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief InnerIterator class to iterate over nonzero values of the current column in the supernodal matrix L
|
||||
*
|
||||
*/
|
||||
template<typename Scalar, typename StorageIndex>
|
||||
class MappedSuperNodalMatrix<Scalar,StorageIndex>::InnerIterator
|
||||
{
|
||||
public:
|
||||
InnerIterator(const MappedSuperNodalMatrix& mat, Index outer)
|
||||
: m_matrix(mat),
|
||||
m_outer(outer),
|
||||
m_supno(mat.colToSup()[outer]),
|
||||
m_idval(mat.colIndexPtr()[outer]),
|
||||
m_startidval(m_idval),
|
||||
m_endidval(mat.colIndexPtr()[outer+1]),
|
||||
m_idrow(mat.rowIndexPtr()[mat.supToCol()[mat.colToSup()[outer]]]),
|
||||
m_endidrow(mat.rowIndexPtr()[mat.supToCol()[mat.colToSup()[outer]]+1])
|
||||
{}
|
||||
inline InnerIterator& operator++()
|
||||
{
|
||||
m_idval++;
|
||||
m_idrow++;
|
||||
return *this;
|
||||
}
|
||||
inline Scalar value() const { return m_matrix.valuePtr()[m_idval]; }
|
||||
|
||||
inline Scalar& valueRef() { return const_cast<Scalar&>(m_matrix.valuePtr()[m_idval]); }
|
||||
|
||||
inline Index index() const { return m_matrix.rowIndex()[m_idrow]; }
|
||||
inline Index row() const { return index(); }
|
||||
inline Index col() const { return m_outer; }
|
||||
|
||||
inline Index supIndex() const { return m_supno; }
|
||||
|
||||
inline operator bool() const
|
||||
{
|
||||
return ( (m_idval < m_endidval) && (m_idval >= m_startidval)
|
||||
&& (m_idrow < m_endidrow) );
|
||||
}
|
||||
|
||||
protected:
|
||||
const MappedSuperNodalMatrix& m_matrix; // Supernodal lower triangular matrix
|
||||
const Index m_outer; // Current column
|
||||
const Index m_supno; // Current SuperNode number
|
||||
Index m_idval; // Index to browse the values in the current column
|
||||
const Index m_startidval; // Start of the column value
|
||||
const Index m_endidval; // End of the column value
|
||||
Index m_idrow; // Index to browse the row indices
|
||||
Index m_endidrow; // End index of row indices of the current column
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Solve with the supernode triangular matrix
|
||||
*
|
||||
*/
|
||||
template<typename Scalar, typename Index_>
|
||||
template<typename Dest>
|
||||
void MappedSuperNodalMatrix<Scalar,Index_>::solveInPlace( MatrixBase<Dest>&X) const
|
||||
{
|
||||
/* Explicit type conversion as the Index type of MatrixBase<Dest> may be wider than Index */
|
||||
// eigen_assert(X.rows() <= NumTraits<Index>::highest());
|
||||
// eigen_assert(X.cols() <= NumTraits<Index>::highest());
|
||||
Index n = int(X.rows());
|
||||
Index nrhs = Index(X.cols());
|
||||
const Scalar * Lval = valuePtr(); // Nonzero values
|
||||
Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor> work(n, nrhs); // working vector
|
||||
work.setZero();
|
||||
for (Index k = 0; k <= nsuper(); k ++)
|
||||
{
|
||||
Index fsupc = supToCol()[k]; // First column of the current supernode
|
||||
Index istart = rowIndexPtr()[fsupc]; // Pointer index to the subscript of the current column
|
||||
Index nsupr = rowIndexPtr()[fsupc+1] - istart; // Number of rows in the current supernode
|
||||
Index nsupc = supToCol()[k+1] - fsupc; // Number of columns in the current supernode
|
||||
Index nrow = nsupr - nsupc; // Number of rows in the non-diagonal part of the supernode
|
||||
Index irow; //Current index row
|
||||
|
||||
if (nsupc == 1 )
|
||||
{
|
||||
for (Index j = 0; j < nrhs; j++)
|
||||
{
|
||||
InnerIterator it(*this, fsupc);
|
||||
++it; // Skip the diagonal element
|
||||
for (; it; ++it)
|
||||
{
|
||||
irow = it.row();
|
||||
X(irow, j) -= X(fsupc, j) * it.value();
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// The supernode has more than one column
|
||||
Index luptr = colIndexPtr()[fsupc];
|
||||
Index lda = colIndexPtr()[fsupc+1] - luptr;
|
||||
|
||||
// Triangular solve
|
||||
Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(lda) );
|
||||
Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) );
|
||||
U = A.template triangularView<UnitLower>().solve(U);
|
||||
|
||||
// Matrix-vector product
|
||||
new (&A) Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > ( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) );
|
||||
work.topRows(nrow).noalias() = A * U;
|
||||
|
||||
//Begin Scatter
|
||||
for (Index j = 0; j < nrhs; j++)
|
||||
{
|
||||
Index iptr = istart + nsupc;
|
||||
for (Index i = 0; i < nrow; i++)
|
||||
{
|
||||
irow = rowIndex()[iptr];
|
||||
X(irow, j) -= work(i, j); // Scatter operation
|
||||
work(i, j) = Scalar(0);
|
||||
iptr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_SPARSELU_MATRIX_H
|
||||
80
examples/ThirdPartyLibs/Eigen/src/SparseLU/SparseLU_Utils.h
Normal file
80
examples/ThirdPartyLibs/Eigen/src/SparseLU/SparseLU_Utils.h
Normal file
@@ -0,0 +1,80 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
|
||||
#ifndef EIGEN_SPARSELU_UTILS_H
|
||||
#define EIGEN_SPARSELU_UTILS_H
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
/**
|
||||
* \brief Count Nonzero elements in the factors
|
||||
*/
|
||||
template <typename Scalar, typename StorageIndex>
|
||||
void SparseLUImpl<Scalar,StorageIndex>::countnz(const Index n, Index& nnzL, Index& nnzU, GlobalLU_t& glu)
|
||||
{
|
||||
nnzL = 0;
|
||||
nnzU = (glu.xusub)(n);
|
||||
Index nsuper = (glu.supno)(n);
|
||||
Index jlen;
|
||||
Index i, j, fsupc;
|
||||
if (n <= 0 ) return;
|
||||
// For each supernode
|
||||
for (i = 0; i <= nsuper; i++)
|
||||
{
|
||||
fsupc = glu.xsup(i);
|
||||
jlen = glu.xlsub(fsupc+1) - glu.xlsub(fsupc);
|
||||
|
||||
for (j = fsupc; j < glu.xsup(i+1); j++)
|
||||
{
|
||||
nnzL += jlen;
|
||||
nnzU += j - fsupc + 1;
|
||||
jlen--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Fix up the data storage lsub for L-subscripts.
|
||||
*
|
||||
* It removes the subscripts sets for structural pruning,
|
||||
* and applies permutation to the remaining subscripts
|
||||
*
|
||||
*/
|
||||
template <typename Scalar, typename StorageIndex>
|
||||
void SparseLUImpl<Scalar,StorageIndex>::fixupL(const Index n, const IndexVector& perm_r, GlobalLU_t& glu)
|
||||
{
|
||||
Index fsupc, i, j, k, jstart;
|
||||
|
||||
StorageIndex nextl = 0;
|
||||
Index nsuper = (glu.supno)(n);
|
||||
|
||||
// For each supernode
|
||||
for (i = 0; i <= nsuper; i++)
|
||||
{
|
||||
fsupc = glu.xsup(i);
|
||||
jstart = glu.xlsub(fsupc);
|
||||
glu.xlsub(fsupc) = nextl;
|
||||
for (j = jstart; j < glu.xlsub(fsupc + 1); j++)
|
||||
{
|
||||
glu.lsub(nextl) = perm_r(glu.lsub(j)); // Now indexed into P*A
|
||||
nextl++;
|
||||
}
|
||||
for (k = fsupc+1; k < glu.xsup(i+1); k++)
|
||||
glu.xlsub(k) = nextl; // other columns in supernode i
|
||||
}
|
||||
|
||||
glu.xlsub(n) = nextl;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
#endif // EIGEN_SPARSELU_UTILS_H
|
||||
@@ -0,0 +1,181 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
|
||||
// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
/*
|
||||
|
||||
* NOTE: This file is the modified version of xcolumn_bmod.c file in SuperLU
|
||||
|
||||
* -- SuperLU routine (version 3.0) --
|
||||
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
|
||||
* and Lawrence Berkeley National Lab.
|
||||
* October 15, 2003
|
||||
*
|
||||
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
|
||||
*
|
||||
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
|
||||
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
|
||||
*
|
||||
* Permission is hereby granted to use or copy this program for any
|
||||
* purpose, provided the above notices are retained on all copies.
|
||||
* Permission to modify the code and to distribute modified code is
|
||||
* granted, provided the above notices are retained, and a notice that
|
||||
* the code was modified is included with the above copyright notice.
|
||||
*/
|
||||
#ifndef SPARSELU_COLUMN_BMOD_H
|
||||
#define SPARSELU_COLUMN_BMOD_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
/**
|
||||
* \brief Performs numeric block updates (sup-col) in topological order
|
||||
*
|
||||
* \param jcol current column to update
|
||||
* \param nseg Number of segments in the U part
|
||||
* \param dense Store the full representation of the column
|
||||
* \param tempv working array
|
||||
* \param segrep segment representative ...
|
||||
* \param repfnz ??? First nonzero column in each row ??? ...
|
||||
* \param fpanelc First column in the current panel
|
||||
* \param glu Global LU data.
|
||||
* \return 0 - successful return
|
||||
* > 0 - number of bytes allocated when run out of space
|
||||
*
|
||||
*/
|
||||
template <typename Scalar, typename StorageIndex>
|
||||
Index SparseLUImpl<Scalar,StorageIndex>::column_bmod(const Index jcol, const Index nseg, BlockScalarVector dense, ScalarVector& tempv,
|
||||
BlockIndexVector segrep, BlockIndexVector repfnz, Index fpanelc, GlobalLU_t& glu)
|
||||
{
|
||||
Index jsupno, k, ksub, krep, ksupno;
|
||||
Index lptr, nrow, isub, irow, nextlu, new_next, ufirst;
|
||||
Index fsupc, nsupc, nsupr, luptr, kfnz, no_zeros;
|
||||
/* krep = representative of current k-th supernode
|
||||
* fsupc = first supernodal column
|
||||
* nsupc = number of columns in a supernode
|
||||
* nsupr = number of rows in a supernode
|
||||
* luptr = location of supernodal LU-block in storage
|
||||
* kfnz = first nonz in the k-th supernodal segment
|
||||
* no_zeros = no lf leading zeros in a supernodal U-segment
|
||||
*/
|
||||
|
||||
jsupno = glu.supno(jcol);
|
||||
// For each nonzero supernode segment of U[*,j] in topological order
|
||||
k = nseg - 1;
|
||||
Index d_fsupc; // distance between the first column of the current panel and the
|
||||
// first column of the current snode
|
||||
Index fst_col; // First column within small LU update
|
||||
Index segsize;
|
||||
for (ksub = 0; ksub < nseg; ksub++)
|
||||
{
|
||||
krep = segrep(k); k--;
|
||||
ksupno = glu.supno(krep);
|
||||
if (jsupno != ksupno )
|
||||
{
|
||||
// outside the rectangular supernode
|
||||
fsupc = glu.xsup(ksupno);
|
||||
fst_col = (std::max)(fsupc, fpanelc);
|
||||
|
||||
// Distance from the current supernode to the current panel;
|
||||
// d_fsupc = 0 if fsupc > fpanelc
|
||||
d_fsupc = fst_col - fsupc;
|
||||
|
||||
luptr = glu.xlusup(fst_col) + d_fsupc;
|
||||
lptr = glu.xlsub(fsupc) + d_fsupc;
|
||||
|
||||
kfnz = repfnz(krep);
|
||||
kfnz = (std::max)(kfnz, fpanelc);
|
||||
|
||||
segsize = krep - kfnz + 1;
|
||||
nsupc = krep - fst_col + 1;
|
||||
nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc);
|
||||
nrow = nsupr - d_fsupc - nsupc;
|
||||
Index lda = glu.xlusup(fst_col+1) - glu.xlusup(fst_col);
|
||||
|
||||
|
||||
// Perform a triangular solver and block update,
|
||||
// then scatter the result of sup-col update to dense
|
||||
no_zeros = kfnz - fst_col;
|
||||
if(segsize==1)
|
||||
LU_kernel_bmod<1>::run(segsize, dense, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
|
||||
else
|
||||
LU_kernel_bmod<Dynamic>::run(segsize, dense, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
|
||||
} // end if jsupno
|
||||
} // end for each segment
|
||||
|
||||
// Process the supernodal portion of L\U[*,j]
|
||||
nextlu = glu.xlusup(jcol);
|
||||
fsupc = glu.xsup(jsupno);
|
||||
|
||||
// copy the SPA dense into L\U[*,j]
|
||||
Index mem;
|
||||
new_next = nextlu + glu.xlsub(fsupc + 1) - glu.xlsub(fsupc);
|
||||
Index offset = internal::first_multiple<Index>(new_next, internal::packet_traits<Scalar>::size) - new_next;
|
||||
if(offset)
|
||||
new_next += offset;
|
||||
while (new_next > glu.nzlumax )
|
||||
{
|
||||
mem = memXpand<ScalarVector>(glu.lusup, glu.nzlumax, nextlu, LUSUP, glu.num_expansions);
|
||||
if (mem) return mem;
|
||||
}
|
||||
|
||||
for (isub = glu.xlsub(fsupc); isub < glu.xlsub(fsupc+1); isub++)
|
||||
{
|
||||
irow = glu.lsub(isub);
|
||||
glu.lusup(nextlu) = dense(irow);
|
||||
dense(irow) = Scalar(0.0);
|
||||
++nextlu;
|
||||
}
|
||||
|
||||
if(offset)
|
||||
{
|
||||
glu.lusup.segment(nextlu,offset).setZero();
|
||||
nextlu += offset;
|
||||
}
|
||||
glu.xlusup(jcol + 1) = StorageIndex(nextlu); // close L\U(*,jcol);
|
||||
|
||||
/* For more updates within the panel (also within the current supernode),
|
||||
* should start from the first column of the panel, or the first column
|
||||
* of the supernode, whichever is bigger. There are two cases:
|
||||
* 1) fsupc < fpanelc, then fst_col <-- fpanelc
|
||||
* 2) fsupc >= fpanelc, then fst_col <-- fsupc
|
||||
*/
|
||||
fst_col = (std::max)(fsupc, fpanelc);
|
||||
|
||||
if (fst_col < jcol)
|
||||
{
|
||||
// Distance between the current supernode and the current panel
|
||||
// d_fsupc = 0 if fsupc >= fpanelc
|
||||
d_fsupc = fst_col - fsupc;
|
||||
|
||||
lptr = glu.xlsub(fsupc) + d_fsupc;
|
||||
luptr = glu.xlusup(fst_col) + d_fsupc;
|
||||
nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); // leading dimension
|
||||
nsupc = jcol - fst_col; // excluding jcol
|
||||
nrow = nsupr - d_fsupc - nsupc;
|
||||
|
||||
// points to the beginning of jcol in snode L\U(jsupno)
|
||||
ufirst = glu.xlusup(jcol) + d_fsupc;
|
||||
Index lda = glu.xlusup(jcol+1) - glu.xlusup(jcol);
|
||||
MappedMatrixBlock A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(lda) );
|
||||
VectorBlock<ScalarVector> u(glu.lusup, ufirst, nsupc);
|
||||
u = A.template triangularView<UnitLower>().solve(u);
|
||||
|
||||
new (&A) MappedMatrixBlock ( &(glu.lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) );
|
||||
VectorBlock<ScalarVector> l(glu.lusup, ufirst+nsupc, nrow);
|
||||
l.noalias() -= A * u;
|
||||
|
||||
} // End if fst_col
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // SPARSELU_COLUMN_BMOD_H
|
||||
179
examples/ThirdPartyLibs/Eigen/src/SparseLU/SparseLU_column_dfs.h
Normal file
179
examples/ThirdPartyLibs/Eigen/src/SparseLU/SparseLU_column_dfs.h
Normal file
@@ -0,0 +1,179 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
/*
|
||||
|
||||
* NOTE: This file is the modified version of [s,d,c,z]column_dfs.c file in SuperLU
|
||||
|
||||
* -- SuperLU routine (version 2.0) --
|
||||
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
|
||||
* and Lawrence Berkeley National Lab.
|
||||
* November 15, 1997
|
||||
*
|
||||
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
|
||||
*
|
||||
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
|
||||
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
|
||||
*
|
||||
* Permission is hereby granted to use or copy this program for any
|
||||
* purpose, provided the above notices are retained on all copies.
|
||||
* Permission to modify the code and to distribute modified code is
|
||||
* granted, provided the above notices are retained, and a notice that
|
||||
* the code was modified is included with the above copyright notice.
|
||||
*/
|
||||
#ifndef SPARSELU_COLUMN_DFS_H
|
||||
#define SPARSELU_COLUMN_DFS_H
|
||||
|
||||
template <typename Scalar, typename StorageIndex> class SparseLUImpl;
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename IndexVector, typename ScalarVector>
|
||||
struct column_dfs_traits : no_assignment_operator
|
||||
{
|
||||
typedef typename ScalarVector::Scalar Scalar;
|
||||
typedef typename IndexVector::Scalar StorageIndex;
|
||||
column_dfs_traits(Index jcol, Index& jsuper, typename SparseLUImpl<Scalar, StorageIndex>::GlobalLU_t& glu, SparseLUImpl<Scalar, StorageIndex>& luImpl)
|
||||
: m_jcol(jcol), m_jsuper_ref(jsuper), m_glu(glu), m_luImpl(luImpl)
|
||||
{}
|
||||
bool update_segrep(Index /*krep*/, Index /*jj*/)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
void mem_expand(IndexVector& lsub, Index& nextl, Index chmark)
|
||||
{
|
||||
if (nextl >= m_glu.nzlmax)
|
||||
m_luImpl.memXpand(lsub, m_glu.nzlmax, nextl, LSUB, m_glu.num_expansions);
|
||||
if (chmark != (m_jcol-1)) m_jsuper_ref = emptyIdxLU;
|
||||
}
|
||||
enum { ExpandMem = true };
|
||||
|
||||
Index m_jcol;
|
||||
Index& m_jsuper_ref;
|
||||
typename SparseLUImpl<Scalar, StorageIndex>::GlobalLU_t& m_glu;
|
||||
SparseLUImpl<Scalar, StorageIndex>& m_luImpl;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* \brief Performs a symbolic factorization on column jcol and decide the supernode boundary
|
||||
*
|
||||
* A supernode representative is the last column of a supernode.
|
||||
* The nonzeros in U[*,j] are segments that end at supernodes representatives.
|
||||
* The routine returns a list of the supernodal representatives
|
||||
* in topological order of the dfs that generates them.
|
||||
* The location of the first nonzero in each supernodal segment
|
||||
* (supernodal entry location) is also returned.
|
||||
*
|
||||
* \param m number of rows in the matrix
|
||||
* \param jcol Current column
|
||||
* \param perm_r Row permutation
|
||||
* \param maxsuper Maximum number of column allowed in a supernode
|
||||
* \param [in,out] nseg Number of segments in current U[*,j] - new segments appended
|
||||
* \param lsub_col defines the rhs vector to start the dfs
|
||||
* \param [in,out] segrep Segment representatives - new segments appended
|
||||
* \param repfnz First nonzero location in each row
|
||||
* \param xprune
|
||||
* \param marker marker[i] == jj, if i was visited during dfs of current column jj;
|
||||
* \param parent
|
||||
* \param xplore working array
|
||||
* \param glu global LU data
|
||||
* \return 0 success
|
||||
* > 0 number of bytes allocated when run out of space
|
||||
*
|
||||
*/
|
||||
template <typename Scalar, typename StorageIndex>
|
||||
Index SparseLUImpl<Scalar,StorageIndex>::column_dfs(const Index m, const Index jcol, IndexVector& perm_r, Index maxsuper, Index& nseg,
|
||||
BlockIndexVector lsub_col, IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune,
|
||||
IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu)
|
||||
{
|
||||
|
||||
Index jsuper = glu.supno(jcol);
|
||||
Index nextl = glu.xlsub(jcol);
|
||||
VectorBlock<IndexVector> marker2(marker, 2*m, m);
|
||||
|
||||
|
||||
column_dfs_traits<IndexVector, ScalarVector> traits(jcol, jsuper, glu, *this);
|
||||
|
||||
// For each nonzero in A(*,jcol) do dfs
|
||||
for (Index k = 0; ((k < m) ? lsub_col[k] != emptyIdxLU : false) ; k++)
|
||||
{
|
||||
Index krow = lsub_col(k);
|
||||
lsub_col(k) = emptyIdxLU;
|
||||
Index kmark = marker2(krow);
|
||||
|
||||
// krow was visited before, go to the next nonz;
|
||||
if (kmark == jcol) continue;
|
||||
|
||||
dfs_kernel(StorageIndex(jcol), perm_r, nseg, glu.lsub, segrep, repfnz, xprune, marker2, parent,
|
||||
xplore, glu, nextl, krow, traits);
|
||||
} // for each nonzero ...
|
||||
|
||||
Index fsupc;
|
||||
StorageIndex nsuper = glu.supno(jcol);
|
||||
StorageIndex jcolp1 = StorageIndex(jcol) + 1;
|
||||
Index jcolm1 = jcol - 1;
|
||||
|
||||
// check to see if j belongs in the same supernode as j-1
|
||||
if ( jcol == 0 )
|
||||
{ // Do nothing for column 0
|
||||
nsuper = glu.supno(0) = 0 ;
|
||||
}
|
||||
else
|
||||
{
|
||||
fsupc = glu.xsup(nsuper);
|
||||
StorageIndex jptr = glu.xlsub(jcol); // Not yet compressed
|
||||
StorageIndex jm1ptr = glu.xlsub(jcolm1);
|
||||
|
||||
// Use supernodes of type T2 : see SuperLU paper
|
||||
if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = emptyIdxLU;
|
||||
|
||||
// Make sure the number of columns in a supernode doesn't
|
||||
// exceed threshold
|
||||
if ( (jcol - fsupc) >= maxsuper) jsuper = emptyIdxLU;
|
||||
|
||||
/* If jcol starts a new supernode, reclaim storage space in
|
||||
* glu.lsub from previous supernode. Note we only store
|
||||
* the subscript set of the first and last columns of
|
||||
* a supernode. (first for num values, last for pruning)
|
||||
*/
|
||||
if (jsuper == emptyIdxLU)
|
||||
{ // starts a new supernode
|
||||
if ( (fsupc < jcolm1-1) )
|
||||
{ // >= 3 columns in nsuper
|
||||
StorageIndex ito = glu.xlsub(fsupc+1);
|
||||
glu.xlsub(jcolm1) = ito;
|
||||
StorageIndex istop = ito + jptr - jm1ptr;
|
||||
xprune(jcolm1) = istop; // intialize xprune(jcol-1)
|
||||
glu.xlsub(jcol) = istop;
|
||||
|
||||
for (StorageIndex ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito)
|
||||
glu.lsub(ito) = glu.lsub(ifrom);
|
||||
nextl = ito; // = istop + length(jcol)
|
||||
}
|
||||
nsuper++;
|
||||
glu.supno(jcol) = nsuper;
|
||||
} // if a new supernode
|
||||
} // end else: jcol > 0
|
||||
|
||||
// Tidy up the pointers before exit
|
||||
glu.xsup(nsuper+1) = jcolp1;
|
||||
glu.supno(jcolp1) = nsuper;
|
||||
xprune(jcol) = StorageIndex(nextl); // Intialize upper bound for pruning
|
||||
glu.xlsub(jcolp1) = StorageIndex(nextl);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,107 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
/*
|
||||
|
||||
* NOTE: This file is the modified version of [s,d,c,z]copy_to_ucol.c file in SuperLU
|
||||
|
||||
* -- SuperLU routine (version 2.0) --
|
||||
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
|
||||
* and Lawrence Berkeley National Lab.
|
||||
* November 15, 1997
|
||||
*
|
||||
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
|
||||
*
|
||||
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
|
||||
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
|
||||
*
|
||||
* Permission is hereby granted to use or copy this program for any
|
||||
* purpose, provided the above notices are retained on all copies.
|
||||
* Permission to modify the code and to distribute modified code is
|
||||
* granted, provided the above notices are retained, and a notice that
|
||||
* the code was modified is included with the above copyright notice.
|
||||
*/
|
||||
#ifndef SPARSELU_COPY_TO_UCOL_H
|
||||
#define SPARSELU_COPY_TO_UCOL_H
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
/**
|
||||
* \brief Performs numeric block updates (sup-col) in topological order
|
||||
*
|
||||
* \param jcol current column to update
|
||||
* \param nseg Number of segments in the U part
|
||||
* \param segrep segment representative ...
|
||||
* \param repfnz First nonzero column in each row ...
|
||||
* \param perm_r Row permutation
|
||||
* \param dense Store the full representation of the column
|
||||
* \param glu Global LU data.
|
||||
* \return 0 - successful return
|
||||
* > 0 - number of bytes allocated when run out of space
|
||||
*
|
||||
*/
|
||||
template <typename Scalar, typename StorageIndex>
|
||||
Index SparseLUImpl<Scalar,StorageIndex>::copy_to_ucol(const Index jcol, const Index nseg, IndexVector& segrep,
|
||||
BlockIndexVector repfnz ,IndexVector& perm_r, BlockScalarVector dense, GlobalLU_t& glu)
|
||||
{
|
||||
Index ksub, krep, ksupno;
|
||||
|
||||
Index jsupno = glu.supno(jcol);
|
||||
|
||||
// For each nonzero supernode segment of U[*,j] in topological order
|
||||
Index k = nseg - 1, i;
|
||||
StorageIndex nextu = glu.xusub(jcol);
|
||||
Index kfnz, isub, segsize;
|
||||
Index new_next,irow;
|
||||
Index fsupc, mem;
|
||||
for (ksub = 0; ksub < nseg; ksub++)
|
||||
{
|
||||
krep = segrep(k); k--;
|
||||
ksupno = glu.supno(krep);
|
||||
if (jsupno != ksupno ) // should go into ucol();
|
||||
{
|
||||
kfnz = repfnz(krep);
|
||||
if (kfnz != emptyIdxLU)
|
||||
{ // Nonzero U-segment
|
||||
fsupc = glu.xsup(ksupno);
|
||||
isub = glu.xlsub(fsupc) + kfnz - fsupc;
|
||||
segsize = krep - kfnz + 1;
|
||||
new_next = nextu + segsize;
|
||||
while (new_next > glu.nzumax)
|
||||
{
|
||||
mem = memXpand<ScalarVector>(glu.ucol, glu.nzumax, nextu, UCOL, glu.num_expansions);
|
||||
if (mem) return mem;
|
||||
mem = memXpand<IndexVector>(glu.usub, glu.nzumax, nextu, USUB, glu.num_expansions);
|
||||
if (mem) return mem;
|
||||
|
||||
}
|
||||
|
||||
for (i = 0; i < segsize; i++)
|
||||
{
|
||||
irow = glu.lsub(isub);
|
||||
glu.usub(nextu) = perm_r(irow); // Unlike the L part, the U part is stored in its final order
|
||||
glu.ucol(nextu) = dense(irow);
|
||||
dense(irow) = Scalar(0.0);
|
||||
nextu++;
|
||||
isub++;
|
||||
}
|
||||
|
||||
} // end nonzero U-segment
|
||||
|
||||
} // end if jsupno
|
||||
|
||||
} // end for each segment
|
||||
glu.xusub(jcol + 1) = nextu; // close U(*,jcol)
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // SPARSELU_COPY_TO_UCOL_H
|
||||
@@ -0,0 +1,280 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_SPARSELU_GEMM_KERNEL_H
|
||||
#define EIGEN_SPARSELU_GEMM_KERNEL_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
|
||||
/** \internal
|
||||
* A general matrix-matrix product kernel optimized for the SparseLU factorization.
|
||||
* - A, B, and C must be column major
|
||||
* - lda and ldc must be multiples of the respective packet size
|
||||
* - C must have the same alignment as A
|
||||
*/
|
||||
template<typename Scalar>
|
||||
EIGEN_DONT_INLINE
|
||||
void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const Scalar* B, Index ldb, Scalar* C, Index ldc)
|
||||
{
|
||||
using namespace Eigen::internal;
|
||||
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
enum {
|
||||
NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
|
||||
PacketSize = packet_traits<Scalar>::size,
|
||||
PM = 8, // peeling in M
|
||||
RN = 2, // register blocking
|
||||
RK = NumberOfRegisters>=16 ? 4 : 2, // register blocking
|
||||
BM = 4096/sizeof(Scalar), // number of rows of A-C per chunk
|
||||
SM = PM*PacketSize // step along M
|
||||
};
|
||||
Index d_end = (d/RK)*RK; // number of columns of A (rows of B) suitable for full register blocking
|
||||
Index n_end = (n/RN)*RN; // number of columns of B-C suitable for processing RN columns at once
|
||||
Index i0 = internal::first_default_aligned(A,m);
|
||||
|
||||
eigen_internal_assert(((lda%PacketSize)==0) && ((ldc%PacketSize)==0) && (i0==internal::first_default_aligned(C,m)));
|
||||
|
||||
// handle the non aligned rows of A and C without any optimization:
|
||||
for(Index i=0; i<i0; ++i)
|
||||
{
|
||||
for(Index j=0; j<n; ++j)
|
||||
{
|
||||
Scalar c = C[i+j*ldc];
|
||||
for(Index k=0; k<d; ++k)
|
||||
c += B[k+j*ldb] * A[i+k*lda];
|
||||
C[i+j*ldc] = c;
|
||||
}
|
||||
}
|
||||
// process the remaining rows per chunk of BM rows
|
||||
for(Index ib=i0; ib<m; ib+=BM)
|
||||
{
|
||||
Index actual_b = std::min<Index>(BM, m-ib); // actual number of rows
|
||||
Index actual_b_end1 = (actual_b/SM)*SM; // actual number of rows suitable for peeling
|
||||
Index actual_b_end2 = (actual_b/PacketSize)*PacketSize; // actual number of rows suitable for vectorization
|
||||
|
||||
// Let's process two columns of B-C at once
|
||||
for(Index j=0; j<n_end; j+=RN)
|
||||
{
|
||||
const Scalar* Bc0 = B+(j+0)*ldb;
|
||||
const Scalar* Bc1 = B+(j+1)*ldb;
|
||||
|
||||
for(Index k=0; k<d_end; k+=RK)
|
||||
{
|
||||
|
||||
// load and expand a RN x RK block of B
|
||||
Packet b00, b10, b20, b30, b01, b11, b21, b31;
|
||||
{ b00 = pset1<Packet>(Bc0[0]); }
|
||||
{ b10 = pset1<Packet>(Bc0[1]); }
|
||||
if(RK==4) { b20 = pset1<Packet>(Bc0[2]); }
|
||||
if(RK==4) { b30 = pset1<Packet>(Bc0[3]); }
|
||||
{ b01 = pset1<Packet>(Bc1[0]); }
|
||||
{ b11 = pset1<Packet>(Bc1[1]); }
|
||||
if(RK==4) { b21 = pset1<Packet>(Bc1[2]); }
|
||||
if(RK==4) { b31 = pset1<Packet>(Bc1[3]); }
|
||||
|
||||
Packet a0, a1, a2, a3, c0, c1, t0, t1;
|
||||
|
||||
const Scalar* A0 = A+ib+(k+0)*lda;
|
||||
const Scalar* A1 = A+ib+(k+1)*lda;
|
||||
const Scalar* A2 = A+ib+(k+2)*lda;
|
||||
const Scalar* A3 = A+ib+(k+3)*lda;
|
||||
|
||||
Scalar* C0 = C+ib+(j+0)*ldc;
|
||||
Scalar* C1 = C+ib+(j+1)*ldc;
|
||||
|
||||
a0 = pload<Packet>(A0);
|
||||
a1 = pload<Packet>(A1);
|
||||
if(RK==4)
|
||||
{
|
||||
a2 = pload<Packet>(A2);
|
||||
a3 = pload<Packet>(A3);
|
||||
}
|
||||
else
|
||||
{
|
||||
// workaround "may be used uninitialized in this function" warning
|
||||
a2 = a3 = a0;
|
||||
}
|
||||
|
||||
#define KMADD(c, a, b, tmp) {tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);}
|
||||
#define WORK(I) \
|
||||
c0 = pload<Packet>(C0+i+(I)*PacketSize); \
|
||||
c1 = pload<Packet>(C1+i+(I)*PacketSize); \
|
||||
KMADD(c0, a0, b00, t0) \
|
||||
KMADD(c1, a0, b01, t1) \
|
||||
a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
|
||||
KMADD(c0, a1, b10, t0) \
|
||||
KMADD(c1, a1, b11, t1) \
|
||||
a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
|
||||
if(RK==4){ KMADD(c0, a2, b20, t0) }\
|
||||
if(RK==4){ KMADD(c1, a2, b21, t1) }\
|
||||
if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize); }\
|
||||
if(RK==4){ KMADD(c0, a3, b30, t0) }\
|
||||
if(RK==4){ KMADD(c1, a3, b31, t1) }\
|
||||
if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\
|
||||
pstore(C0+i+(I)*PacketSize, c0); \
|
||||
pstore(C1+i+(I)*PacketSize, c1)
|
||||
|
||||
// process rows of A' - C' with aggressive vectorization and peeling
|
||||
for(Index i=0; i<actual_b_end1; i+=PacketSize*8)
|
||||
{
|
||||
EIGEN_ASM_COMMENT("SPARSELU_GEMML_KERNEL1");
|
||||
prefetch((A0+i+(5)*PacketSize));
|
||||
prefetch((A1+i+(5)*PacketSize));
|
||||
if(RK==4) prefetch((A2+i+(5)*PacketSize));
|
||||
if(RK==4) prefetch((A3+i+(5)*PacketSize));
|
||||
|
||||
WORK(0);
|
||||
WORK(1);
|
||||
WORK(2);
|
||||
WORK(3);
|
||||
WORK(4);
|
||||
WORK(5);
|
||||
WORK(6);
|
||||
WORK(7);
|
||||
}
|
||||
// process the remaining rows with vectorization only
|
||||
for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize)
|
||||
{
|
||||
WORK(0);
|
||||
}
|
||||
#undef WORK
|
||||
// process the remaining rows without vectorization
|
||||
for(Index i=actual_b_end2; i<actual_b; ++i)
|
||||
{
|
||||
if(RK==4)
|
||||
{
|
||||
C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1]+A2[i]*Bc0[2]+A3[i]*Bc0[3];
|
||||
C1[i] += A0[i]*Bc1[0]+A1[i]*Bc1[1]+A2[i]*Bc1[2]+A3[i]*Bc1[3];
|
||||
}
|
||||
else
|
||||
{
|
||||
C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1];
|
||||
C1[i] += A0[i]*Bc1[0]+A1[i]*Bc1[1];
|
||||
}
|
||||
}
|
||||
|
||||
Bc0 += RK;
|
||||
Bc1 += RK;
|
||||
} // peeled loop on k
|
||||
} // peeled loop on the columns j
|
||||
// process the last column (we now perform a matrix-vector product)
|
||||
if((n-n_end)>0)
|
||||
{
|
||||
const Scalar* Bc0 = B+(n-1)*ldb;
|
||||
|
||||
for(Index k=0; k<d_end; k+=RK)
|
||||
{
|
||||
|
||||
// load and expand a 1 x RK block of B
|
||||
Packet b00, b10, b20, b30;
|
||||
b00 = pset1<Packet>(Bc0[0]);
|
||||
b10 = pset1<Packet>(Bc0[1]);
|
||||
if(RK==4) b20 = pset1<Packet>(Bc0[2]);
|
||||
if(RK==4) b30 = pset1<Packet>(Bc0[3]);
|
||||
|
||||
Packet a0, a1, a2, a3, c0, t0/*, t1*/;
|
||||
|
||||
const Scalar* A0 = A+ib+(k+0)*lda;
|
||||
const Scalar* A1 = A+ib+(k+1)*lda;
|
||||
const Scalar* A2 = A+ib+(k+2)*lda;
|
||||
const Scalar* A3 = A+ib+(k+3)*lda;
|
||||
|
||||
Scalar* C0 = C+ib+(n_end)*ldc;
|
||||
|
||||
a0 = pload<Packet>(A0);
|
||||
a1 = pload<Packet>(A1);
|
||||
if(RK==4)
|
||||
{
|
||||
a2 = pload<Packet>(A2);
|
||||
a3 = pload<Packet>(A3);
|
||||
}
|
||||
else
|
||||
{
|
||||
// workaround "may be used uninitialized in this function" warning
|
||||
a2 = a3 = a0;
|
||||
}
|
||||
|
||||
#define WORK(I) \
|
||||
c0 = pload<Packet>(C0+i+(I)*PacketSize); \
|
||||
KMADD(c0, a0, b00, t0) \
|
||||
a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
|
||||
KMADD(c0, a1, b10, t0) \
|
||||
a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
|
||||
if(RK==4){ KMADD(c0, a2, b20, t0) }\
|
||||
if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize); }\
|
||||
if(RK==4){ KMADD(c0, a3, b30, t0) }\
|
||||
if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\
|
||||
pstore(C0+i+(I)*PacketSize, c0);
|
||||
|
||||
// agressive vectorization and peeling
|
||||
for(Index i=0; i<actual_b_end1; i+=PacketSize*8)
|
||||
{
|
||||
EIGEN_ASM_COMMENT("SPARSELU_GEMML_KERNEL2");
|
||||
WORK(0);
|
||||
WORK(1);
|
||||
WORK(2);
|
||||
WORK(3);
|
||||
WORK(4);
|
||||
WORK(5);
|
||||
WORK(6);
|
||||
WORK(7);
|
||||
}
|
||||
// vectorization only
|
||||
for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize)
|
||||
{
|
||||
WORK(0);
|
||||
}
|
||||
// remaining scalars
|
||||
for(Index i=actual_b_end2; i<actual_b; ++i)
|
||||
{
|
||||
if(RK==4)
|
||||
C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1]+A2[i]*Bc0[2]+A3[i]*Bc0[3];
|
||||
else
|
||||
C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1];
|
||||
}
|
||||
|
||||
Bc0 += RK;
|
||||
#undef WORK
|
||||
}
|
||||
}
|
||||
|
||||
// process the last columns of A, corresponding to the last rows of B
|
||||
Index rd = d-d_end;
|
||||
if(rd>0)
|
||||
{
|
||||
for(Index j=0; j<n; ++j)
|
||||
{
|
||||
enum {
|
||||
Alignment = PacketSize>1 ? Aligned : 0
|
||||
};
|
||||
typedef Map<Matrix<Scalar,Dynamic,1>, Alignment > MapVector;
|
||||
typedef Map<const Matrix<Scalar,Dynamic,1>, Alignment > ConstMapVector;
|
||||
if(rd==1) MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b);
|
||||
|
||||
else if(rd==2) MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b)
|
||||
+ B[1+d_end+j*ldb] * ConstMapVector(A+(d_end+1)*lda+ib, actual_b);
|
||||
|
||||
else MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b)
|
||||
+ B[1+d_end+j*ldb] * ConstMapVector(A+(d_end+1)*lda+ib, actual_b)
|
||||
+ B[2+d_end+j*ldb] * ConstMapVector(A+(d_end+2)*lda+ib, actual_b);
|
||||
}
|
||||
}
|
||||
|
||||
} // blocking on the rows of A and C
|
||||
}
|
||||
#undef KMADD
|
||||
|
||||
} // namespace internal
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
#endif // EIGEN_SPARSELU_GEMM_KERNEL_H
|
||||
@@ -0,0 +1,126 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
/* This file is a modified version of heap_relax_snode.c file in SuperLU
|
||||
* -- SuperLU routine (version 3.0) --
|
||||
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
|
||||
* and Lawrence Berkeley National Lab.
|
||||
* October 15, 2003
|
||||
*
|
||||
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
|
||||
*
|
||||
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
|
||||
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
|
||||
*
|
||||
* Permission is hereby granted to use or copy this program for any
|
||||
* purpose, provided the above notices are retained on all copies.
|
||||
* Permission to modify the code and to distribute modified code is
|
||||
* granted, provided the above notices are retained, and a notice that
|
||||
* the code was modified is included with the above copyright notice.
|
||||
*/
|
||||
|
||||
#ifndef SPARSELU_HEAP_RELAX_SNODE_H
|
||||
#define SPARSELU_HEAP_RELAX_SNODE_H
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
/**
|
||||
* \brief Identify the initial relaxed supernodes
|
||||
*
|
||||
* This routine applied to a symmetric elimination tree.
|
||||
* It assumes that the matrix has been reordered according to the postorder of the etree
|
||||
* \param n The number of columns
|
||||
* \param et elimination tree
|
||||
* \param relax_columns Maximum number of columns allowed in a relaxed snode
|
||||
* \param descendants Number of descendants of each node in the etree
|
||||
* \param relax_end last column in a supernode
|
||||
*/
|
||||
template <typename Scalar, typename StorageIndex>
|
||||
void SparseLUImpl<Scalar,StorageIndex>::heap_relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end)
|
||||
{
|
||||
|
||||
// The etree may not be postordered, but its heap ordered
|
||||
IndexVector post;
|
||||
internal::treePostorder(StorageIndex(n), et, post); // Post order etree
|
||||
IndexVector inv_post(n+1);
|
||||
for (StorageIndex i = 0; i < n+1; ++i) inv_post(post(i)) = i; // inv_post = post.inverse()???
|
||||
|
||||
// Renumber etree in postorder
|
||||
IndexVector iwork(n);
|
||||
IndexVector et_save(n+1);
|
||||
for (Index i = 0; i < n; ++i)
|
||||
{
|
||||
iwork(post(i)) = post(et(i));
|
||||
}
|
||||
et_save = et; // Save the original etree
|
||||
et = iwork;
|
||||
|
||||
// compute the number of descendants of each node in the etree
|
||||
relax_end.setConstant(emptyIdxLU);
|
||||
Index j, parent;
|
||||
descendants.setZero();
|
||||
for (j = 0; j < n; j++)
|
||||
{
|
||||
parent = et(j);
|
||||
if (parent != n) // not the dummy root
|
||||
descendants(parent) += descendants(j) + 1;
|
||||
}
|
||||
// Identify the relaxed supernodes by postorder traversal of the etree
|
||||
Index snode_start; // beginning of a snode
|
||||
StorageIndex k;
|
||||
Index nsuper_et_post = 0; // Number of relaxed snodes in postordered etree
|
||||
Index nsuper_et = 0; // Number of relaxed snodes in the original etree
|
||||
StorageIndex l;
|
||||
for (j = 0; j < n; )
|
||||
{
|
||||
parent = et(j);
|
||||
snode_start = j;
|
||||
while ( parent != n && descendants(parent) < relax_columns )
|
||||
{
|
||||
j = parent;
|
||||
parent = et(j);
|
||||
}
|
||||
// Found a supernode in postordered etree, j is the last column
|
||||
++nsuper_et_post;
|
||||
k = StorageIndex(n);
|
||||
for (Index i = snode_start; i <= j; ++i)
|
||||
k = (std::min)(k, inv_post(i));
|
||||
l = inv_post(j);
|
||||
if ( (l - k) == (j - snode_start) ) // Same number of columns in the snode
|
||||
{
|
||||
// This is also a supernode in the original etree
|
||||
relax_end(k) = l; // Record last column
|
||||
++nsuper_et;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (Index i = snode_start; i <= j; ++i)
|
||||
{
|
||||
l = inv_post(i);
|
||||
if (descendants(i) == 0)
|
||||
{
|
||||
relax_end(l) = l;
|
||||
++nsuper_et;
|
||||
}
|
||||
}
|
||||
}
|
||||
j++;
|
||||
// Search for a new leaf
|
||||
while (descendants(j) != 0 && j < n) j++;
|
||||
} // End postorder traversal of the etree
|
||||
|
||||
// Recover the original etree
|
||||
et = et_save;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
#endif // SPARSELU_HEAP_RELAX_SNODE_H
|
||||
@@ -0,0 +1,130 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
|
||||
// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef SPARSELU_KERNEL_BMOD_H
|
||||
#define SPARSELU_KERNEL_BMOD_H
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
template <int SegSizeAtCompileTime> struct LU_kernel_bmod
|
||||
{
|
||||
/** \internal
|
||||
* \brief Performs numeric block updates from a given supernode to a single column
|
||||
*
|
||||
* \param segsize Size of the segment (and blocks ) to use for updates
|
||||
* \param[in,out] dense Packed values of the original matrix
|
||||
* \param tempv temporary vector to use for updates
|
||||
* \param lusup array containing the supernodes
|
||||
* \param lda Leading dimension in the supernode
|
||||
* \param nrow Number of rows in the rectangular part of the supernode
|
||||
* \param lsub compressed row subscripts of supernodes
|
||||
* \param lptr pointer to the first column of the current supernode in lsub
|
||||
* \param no_zeros Number of nonzeros elements before the diagonal part of the supernode
|
||||
*/
|
||||
template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
|
||||
static EIGEN_DONT_INLINE void run(const Index segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
|
||||
const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros);
|
||||
};
|
||||
|
||||
template <int SegSizeAtCompileTime>
|
||||
template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
|
||||
EIGEN_DONT_INLINE void LU_kernel_bmod<SegSizeAtCompileTime>::run(const Index segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
|
||||
const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros)
|
||||
{
|
||||
typedef typename ScalarVector::Scalar Scalar;
|
||||
// First, copy U[*,j] segment from dense(*) to tempv(*)
|
||||
// The result of triangular solve is in tempv[*];
|
||||
// The result of matric-vector update is in dense[*]
|
||||
Index isub = lptr + no_zeros;
|
||||
Index i;
|
||||
Index irow;
|
||||
for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)
|
||||
{
|
||||
irow = lsub(isub);
|
||||
tempv(i) = dense(irow);
|
||||
++isub;
|
||||
}
|
||||
// Dense triangular solve -- start effective triangle
|
||||
luptr += lda * no_zeros + no_zeros;
|
||||
// Form Eigen matrix and vector
|
||||
Map<Matrix<Scalar,SegSizeAtCompileTime,SegSizeAtCompileTime, ColMajor>, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(lda) );
|
||||
Map<Matrix<Scalar,SegSizeAtCompileTime,1> > u(tempv.data(), segsize);
|
||||
|
||||
u = A.template triangularView<UnitLower>().solve(u);
|
||||
|
||||
// Dense matrix-vector product y <-- B*x
|
||||
luptr += segsize;
|
||||
const Index PacketSize = internal::packet_traits<Scalar>::size;
|
||||
Index ldl = internal::first_multiple(nrow, PacketSize);
|
||||
Map<Matrix<Scalar,Dynamic,SegSizeAtCompileTime, ColMajor>, 0, OuterStride<> > B( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(lda) );
|
||||
Index aligned_offset = internal::first_default_aligned(tempv.data()+segsize, PacketSize);
|
||||
Index aligned_with_B_offset = (PacketSize-internal::first_default_aligned(B.data(), PacketSize))%PacketSize;
|
||||
Map<Matrix<Scalar,Dynamic,1>, 0, OuterStride<> > l(tempv.data()+segsize+aligned_offset+aligned_with_B_offset, nrow, OuterStride<>(ldl) );
|
||||
|
||||
l.setZero();
|
||||
internal::sparselu_gemm<Scalar>(l.rows(), l.cols(), B.cols(), B.data(), B.outerStride(), u.data(), u.outerStride(), l.data(), l.outerStride());
|
||||
|
||||
// Scatter tempv[] into SPA dense[] as a temporary storage
|
||||
isub = lptr + no_zeros;
|
||||
for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)
|
||||
{
|
||||
irow = lsub(isub++);
|
||||
dense(irow) = tempv(i);
|
||||
}
|
||||
|
||||
// Scatter l into SPA dense[]
|
||||
for (i = 0; i < nrow; i++)
|
||||
{
|
||||
irow = lsub(isub++);
|
||||
dense(irow) -= l(i);
|
||||
}
|
||||
}
|
||||
|
||||
template <> struct LU_kernel_bmod<1>
|
||||
{
|
||||
template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
|
||||
static EIGEN_DONT_INLINE void run(const Index /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr,
|
||||
const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros);
|
||||
};
|
||||
|
||||
|
||||
template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
|
||||
EIGEN_DONT_INLINE void LU_kernel_bmod<1>::run(const Index /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr,
|
||||
const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros)
|
||||
{
|
||||
typedef typename ScalarVector::Scalar Scalar;
|
||||
typedef typename IndexVector::Scalar StorageIndex;
|
||||
Scalar f = dense(lsub(lptr + no_zeros));
|
||||
luptr += lda * no_zeros + no_zeros + 1;
|
||||
const Scalar* a(lusup.data() + luptr);
|
||||
const StorageIndex* irow(lsub.data()+lptr + no_zeros + 1);
|
||||
Index i = 0;
|
||||
for (; i+1 < nrow; i+=2)
|
||||
{
|
||||
Index i0 = *(irow++);
|
||||
Index i1 = *(irow++);
|
||||
Scalar a0 = *(a++);
|
||||
Scalar a1 = *(a++);
|
||||
Scalar d0 = dense.coeff(i0);
|
||||
Scalar d1 = dense.coeff(i1);
|
||||
d0 -= f*a0;
|
||||
d1 -= f*a1;
|
||||
dense.coeffRef(i0) = d0;
|
||||
dense.coeffRef(i1) = d1;
|
||||
}
|
||||
if(i<nrow)
|
||||
dense.coeffRef(*(irow++)) -= f * *(a++);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
#endif // SPARSELU_KERNEL_BMOD_H
|
||||
223
examples/ThirdPartyLibs/Eigen/src/SparseLU/SparseLU_panel_bmod.h
Normal file
223
examples/ThirdPartyLibs/Eigen/src/SparseLU/SparseLU_panel_bmod.h
Normal file
@@ -0,0 +1,223 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
|
||||
// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
/*
|
||||
|
||||
* NOTE: This file is the modified version of [s,d,c,z]panel_bmod.c file in SuperLU
|
||||
|
||||
* -- SuperLU routine (version 3.0) --
|
||||
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
|
||||
* and Lawrence Berkeley National Lab.
|
||||
* October 15, 2003
|
||||
*
|
||||
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
|
||||
*
|
||||
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
|
||||
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
|
||||
*
|
||||
* Permission is hereby granted to use or copy this program for any
|
||||
* purpose, provided the above notices are retained on all copies.
|
||||
* Permission to modify the code and to distribute modified code is
|
||||
* granted, provided the above notices are retained, and a notice that
|
||||
* the code was modified is included with the above copyright notice.
|
||||
*/
|
||||
#ifndef SPARSELU_PANEL_BMOD_H
|
||||
#define SPARSELU_PANEL_BMOD_H
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
/**
|
||||
* \brief Performs numeric block updates (sup-panel) in topological order.
|
||||
*
|
||||
* Before entering this routine, the original nonzeros in the panel
|
||||
* were already copied i nto the spa[m,w]
|
||||
*
|
||||
* \param m number of rows in the matrix
|
||||
* \param w Panel size
|
||||
* \param jcol Starting column of the panel
|
||||
* \param nseg Number of segments in the U part
|
||||
* \param dense Store the full representation of the panel
|
||||
* \param tempv working array
|
||||
* \param segrep segment representative... first row in the segment
|
||||
* \param repfnz First nonzero rows
|
||||
* \param glu Global LU data.
|
||||
*
|
||||
*
|
||||
*/
|
||||
template <typename Scalar, typename StorageIndex>
|
||||
void SparseLUImpl<Scalar,StorageIndex>::panel_bmod(const Index m, const Index w, const Index jcol,
|
||||
const Index nseg, ScalarVector& dense, ScalarVector& tempv,
|
||||
IndexVector& segrep, IndexVector& repfnz, GlobalLU_t& glu)
|
||||
{
|
||||
|
||||
Index ksub,jj,nextl_col;
|
||||
Index fsupc, nsupc, nsupr, nrow;
|
||||
Index krep, kfnz;
|
||||
Index lptr; // points to the row subscripts of a supernode
|
||||
Index luptr; // ...
|
||||
Index segsize,no_zeros ;
|
||||
// For each nonz supernode segment of U[*,j] in topological order
|
||||
Index k = nseg - 1;
|
||||
const Index PacketSize = internal::packet_traits<Scalar>::size;
|
||||
|
||||
for (ksub = 0; ksub < nseg; ksub++)
|
||||
{ // For each updating supernode
|
||||
/* krep = representative of current k-th supernode
|
||||
* fsupc = first supernodal column
|
||||
* nsupc = number of columns in a supernode
|
||||
* nsupr = number of rows in a supernode
|
||||
*/
|
||||
krep = segrep(k); k--;
|
||||
fsupc = glu.xsup(glu.supno(krep));
|
||||
nsupc = krep - fsupc + 1;
|
||||
nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc);
|
||||
nrow = nsupr - nsupc;
|
||||
lptr = glu.xlsub(fsupc);
|
||||
|
||||
// loop over the panel columns to detect the actual number of columns and rows
|
||||
Index u_rows = 0;
|
||||
Index u_cols = 0;
|
||||
for (jj = jcol; jj < jcol + w; jj++)
|
||||
{
|
||||
nextl_col = (jj-jcol) * m;
|
||||
VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
|
||||
|
||||
kfnz = repfnz_col(krep);
|
||||
if ( kfnz == emptyIdxLU )
|
||||
continue; // skip any zero segment
|
||||
|
||||
segsize = krep - kfnz + 1;
|
||||
u_cols++;
|
||||
u_rows = (std::max)(segsize,u_rows);
|
||||
}
|
||||
|
||||
if(nsupc >= 2)
|
||||
{
|
||||
Index ldu = internal::first_multiple<Index>(u_rows, PacketSize);
|
||||
Map<ScalarMatrix, Aligned, OuterStride<> > U(tempv.data(), u_rows, u_cols, OuterStride<>(ldu));
|
||||
|
||||
// gather U
|
||||
Index u_col = 0;
|
||||
for (jj = jcol; jj < jcol + w; jj++)
|
||||
{
|
||||
nextl_col = (jj-jcol) * m;
|
||||
VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
|
||||
VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here
|
||||
|
||||
kfnz = repfnz_col(krep);
|
||||
if ( kfnz == emptyIdxLU )
|
||||
continue; // skip any zero segment
|
||||
|
||||
segsize = krep - kfnz + 1;
|
||||
luptr = glu.xlusup(fsupc);
|
||||
no_zeros = kfnz - fsupc;
|
||||
|
||||
Index isub = lptr + no_zeros;
|
||||
Index off = u_rows-segsize;
|
||||
for (Index i = 0; i < off; i++) U(i,u_col) = 0;
|
||||
for (Index i = 0; i < segsize; i++)
|
||||
{
|
||||
Index irow = glu.lsub(isub);
|
||||
U(i+off,u_col) = dense_col(irow);
|
||||
++isub;
|
||||
}
|
||||
u_col++;
|
||||
}
|
||||
// solve U = A^-1 U
|
||||
luptr = glu.xlusup(fsupc);
|
||||
Index lda = glu.xlusup(fsupc+1) - glu.xlusup(fsupc);
|
||||
no_zeros = (krep - u_rows + 1) - fsupc;
|
||||
luptr += lda * no_zeros + no_zeros;
|
||||
MappedMatrixBlock A(glu.lusup.data()+luptr, u_rows, u_rows, OuterStride<>(lda) );
|
||||
U = A.template triangularView<UnitLower>().solve(U);
|
||||
|
||||
// update
|
||||
luptr += u_rows;
|
||||
MappedMatrixBlock B(glu.lusup.data()+luptr, nrow, u_rows, OuterStride<>(lda) );
|
||||
eigen_assert(tempv.size()>w*ldu + nrow*w + 1);
|
||||
|
||||
Index ldl = internal::first_multiple<Index>(nrow, PacketSize);
|
||||
Index offset = (PacketSize-internal::first_default_aligned(B.data(), PacketSize)) % PacketSize;
|
||||
MappedMatrixBlock L(tempv.data()+w*ldu+offset, nrow, u_cols, OuterStride<>(ldl));
|
||||
|
||||
L.setZero();
|
||||
internal::sparselu_gemm<Scalar>(L.rows(), L.cols(), B.cols(), B.data(), B.outerStride(), U.data(), U.outerStride(), L.data(), L.outerStride());
|
||||
|
||||
// scatter U and L
|
||||
u_col = 0;
|
||||
for (jj = jcol; jj < jcol + w; jj++)
|
||||
{
|
||||
nextl_col = (jj-jcol) * m;
|
||||
VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
|
||||
VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here
|
||||
|
||||
kfnz = repfnz_col(krep);
|
||||
if ( kfnz == emptyIdxLU )
|
||||
continue; // skip any zero segment
|
||||
|
||||
segsize = krep - kfnz + 1;
|
||||
no_zeros = kfnz - fsupc;
|
||||
Index isub = lptr + no_zeros;
|
||||
|
||||
Index off = u_rows-segsize;
|
||||
for (Index i = 0; i < segsize; i++)
|
||||
{
|
||||
Index irow = glu.lsub(isub++);
|
||||
dense_col(irow) = U.coeff(i+off,u_col);
|
||||
U.coeffRef(i+off,u_col) = 0;
|
||||
}
|
||||
|
||||
// Scatter l into SPA dense[]
|
||||
for (Index i = 0; i < nrow; i++)
|
||||
{
|
||||
Index irow = glu.lsub(isub++);
|
||||
dense_col(irow) -= L.coeff(i,u_col);
|
||||
L.coeffRef(i,u_col) = 0;
|
||||
}
|
||||
u_col++;
|
||||
}
|
||||
}
|
||||
else // level 2 only
|
||||
{
|
||||
// Sequence through each column in the panel
|
||||
for (jj = jcol; jj < jcol + w; jj++)
|
||||
{
|
||||
nextl_col = (jj-jcol) * m;
|
||||
VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
|
||||
VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here
|
||||
|
||||
kfnz = repfnz_col(krep);
|
||||
if ( kfnz == emptyIdxLU )
|
||||
continue; // skip any zero segment
|
||||
|
||||
segsize = krep - kfnz + 1;
|
||||
luptr = glu.xlusup(fsupc);
|
||||
|
||||
Index lda = glu.xlusup(fsupc+1)-glu.xlusup(fsupc);// nsupr
|
||||
|
||||
// Perform a trianglar solve and block update,
|
||||
// then scatter the result of sup-col update to dense[]
|
||||
no_zeros = kfnz - fsupc;
|
||||
if(segsize==1) LU_kernel_bmod<1>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
|
||||
else if(segsize==2) LU_kernel_bmod<2>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
|
||||
else if(segsize==3) LU_kernel_bmod<3>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
|
||||
else LU_kernel_bmod<Dynamic>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
|
||||
} // End for each column in the panel
|
||||
}
|
||||
|
||||
} // End for each updating supernode
|
||||
} // end panel bmod
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // SPARSELU_PANEL_BMOD_H
|
||||
258
examples/ThirdPartyLibs/Eigen/src/SparseLU/SparseLU_panel_dfs.h
Normal file
258
examples/ThirdPartyLibs/Eigen/src/SparseLU/SparseLU_panel_dfs.h
Normal file
@@ -0,0 +1,258 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
/*
|
||||
|
||||
* NOTE: This file is the modified version of [s,d,c,z]panel_dfs.c file in SuperLU
|
||||
|
||||
* -- SuperLU routine (version 2.0) --
|
||||
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
|
||||
* and Lawrence Berkeley National Lab.
|
||||
* November 15, 1997
|
||||
*
|
||||
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
|
||||
*
|
||||
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
|
||||
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
|
||||
*
|
||||
* Permission is hereby granted to use or copy this program for any
|
||||
* purpose, provided the above notices are retained on all copies.
|
||||
* Permission to modify the code and to distribute modified code is
|
||||
* granted, provided the above notices are retained, and a notice that
|
||||
* the code was modified is included with the above copyright notice.
|
||||
*/
|
||||
#ifndef SPARSELU_PANEL_DFS_H
|
||||
#define SPARSELU_PANEL_DFS_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename IndexVector>
|
||||
struct panel_dfs_traits
|
||||
{
|
||||
typedef typename IndexVector::Scalar StorageIndex;
|
||||
panel_dfs_traits(Index jcol, StorageIndex* marker)
|
||||
: m_jcol(jcol), m_marker(marker)
|
||||
{}
|
||||
bool update_segrep(Index krep, StorageIndex jj)
|
||||
{
|
||||
if(m_marker[krep]<m_jcol)
|
||||
{
|
||||
m_marker[krep] = jj;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
void mem_expand(IndexVector& /*glu.lsub*/, Index /*nextl*/, Index /*chmark*/) {}
|
||||
enum { ExpandMem = false };
|
||||
Index m_jcol;
|
||||
StorageIndex* m_marker;
|
||||
};
|
||||
|
||||
|
||||
template <typename Scalar, typename StorageIndex>
|
||||
template <typename Traits>
|
||||
void SparseLUImpl<Scalar,StorageIndex>::dfs_kernel(const StorageIndex jj, IndexVector& perm_r,
|
||||
Index& nseg, IndexVector& panel_lsub, IndexVector& segrep,
|
||||
Ref<IndexVector> repfnz_col, IndexVector& xprune, Ref<IndexVector> marker, IndexVector& parent,
|
||||
IndexVector& xplore, GlobalLU_t& glu,
|
||||
Index& nextl_col, Index krow, Traits& traits
|
||||
)
|
||||
{
|
||||
|
||||
StorageIndex kmark = marker(krow);
|
||||
|
||||
// For each unmarked krow of jj
|
||||
marker(krow) = jj;
|
||||
StorageIndex kperm = perm_r(krow);
|
||||
if (kperm == emptyIdxLU ) {
|
||||
// krow is in L : place it in structure of L(*, jj)
|
||||
panel_lsub(nextl_col++) = StorageIndex(krow); // krow is indexed into A
|
||||
|
||||
traits.mem_expand(panel_lsub, nextl_col, kmark);
|
||||
}
|
||||
else
|
||||
{
|
||||
// krow is in U : if its supernode-representative krep
|
||||
// has been explored, update repfnz(*)
|
||||
// krep = supernode representative of the current row
|
||||
StorageIndex krep = glu.xsup(glu.supno(kperm)+1) - 1;
|
||||
// First nonzero element in the current column:
|
||||
StorageIndex myfnz = repfnz_col(krep);
|
||||
|
||||
if (myfnz != emptyIdxLU )
|
||||
{
|
||||
// Representative visited before
|
||||
if (myfnz > kperm ) repfnz_col(krep) = kperm;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
// Otherwise, perform dfs starting at krep
|
||||
StorageIndex oldrep = emptyIdxLU;
|
||||
parent(krep) = oldrep;
|
||||
repfnz_col(krep) = kperm;
|
||||
StorageIndex xdfs = glu.xlsub(krep);
|
||||
Index maxdfs = xprune(krep);
|
||||
|
||||
StorageIndex kpar;
|
||||
do
|
||||
{
|
||||
// For each unmarked kchild of krep
|
||||
while (xdfs < maxdfs)
|
||||
{
|
||||
StorageIndex kchild = glu.lsub(xdfs);
|
||||
xdfs++;
|
||||
StorageIndex chmark = marker(kchild);
|
||||
|
||||
if (chmark != jj )
|
||||
{
|
||||
marker(kchild) = jj;
|
||||
StorageIndex chperm = perm_r(kchild);
|
||||
|
||||
if (chperm == emptyIdxLU)
|
||||
{
|
||||
// case kchild is in L: place it in L(*, j)
|
||||
panel_lsub(nextl_col++) = kchild;
|
||||
traits.mem_expand(panel_lsub, nextl_col, chmark);
|
||||
}
|
||||
else
|
||||
{
|
||||
// case kchild is in U :
|
||||
// chrep = its supernode-rep. If its rep has been explored,
|
||||
// update its repfnz(*)
|
||||
StorageIndex chrep = glu.xsup(glu.supno(chperm)+1) - 1;
|
||||
myfnz = repfnz_col(chrep);
|
||||
|
||||
if (myfnz != emptyIdxLU)
|
||||
{ // Visited before
|
||||
if (myfnz > chperm)
|
||||
repfnz_col(chrep) = chperm;
|
||||
}
|
||||
else
|
||||
{ // Cont. dfs at snode-rep of kchild
|
||||
xplore(krep) = xdfs;
|
||||
oldrep = krep;
|
||||
krep = chrep; // Go deeper down G(L)
|
||||
parent(krep) = oldrep;
|
||||
repfnz_col(krep) = chperm;
|
||||
xdfs = glu.xlsub(krep);
|
||||
maxdfs = xprune(krep);
|
||||
|
||||
} // end if myfnz != -1
|
||||
} // end if chperm == -1
|
||||
|
||||
} // end if chmark !=jj
|
||||
} // end while xdfs < maxdfs
|
||||
|
||||
// krow has no more unexplored nbrs :
|
||||
// Place snode-rep krep in postorder DFS, if this
|
||||
// segment is seen for the first time. (Note that
|
||||
// "repfnz(krep)" may change later.)
|
||||
// Baktrack dfs to its parent
|
||||
if(traits.update_segrep(krep,jj))
|
||||
//if (marker1(krep) < jcol )
|
||||
{
|
||||
segrep(nseg) = krep;
|
||||
++nseg;
|
||||
//marker1(krep) = jj;
|
||||
}
|
||||
|
||||
kpar = parent(krep); // Pop recursion, mimic recursion
|
||||
if (kpar == emptyIdxLU)
|
||||
break; // dfs done
|
||||
krep = kpar;
|
||||
xdfs = xplore(krep);
|
||||
maxdfs = xprune(krep);
|
||||
|
||||
} while (kpar != emptyIdxLU); // Do until empty stack
|
||||
|
||||
} // end if (myfnz = -1)
|
||||
|
||||
} // end if (kperm == -1)
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Performs a symbolic factorization on a panel of columns [jcol, jcol+w)
|
||||
*
|
||||
* A supernode representative is the last column of a supernode.
|
||||
* The nonzeros in U[*,j] are segments that end at supernodes representatives
|
||||
*
|
||||
* The routine returns a list of the supernodal representatives
|
||||
* in topological order of the dfs that generates them. This list is
|
||||
* a superset of the topological order of each individual column within
|
||||
* the panel.
|
||||
* The location of the first nonzero in each supernodal segment
|
||||
* (supernodal entry location) is also returned. Each column has
|
||||
* a separate list for this purpose.
|
||||
*
|
||||
* Two markers arrays are used for dfs :
|
||||
* marker[i] == jj, if i was visited during dfs of current column jj;
|
||||
* marker1[i] >= jcol, if i was visited by earlier columns in this panel;
|
||||
*
|
||||
* \param[in] m number of rows in the matrix
|
||||
* \param[in] w Panel size
|
||||
* \param[in] jcol Starting column of the panel
|
||||
* \param[in] A Input matrix in column-major storage
|
||||
* \param[in] perm_r Row permutation
|
||||
* \param[out] nseg Number of U segments
|
||||
* \param[out] dense Accumulate the column vectors of the panel
|
||||
* \param[out] panel_lsub Subscripts of the row in the panel
|
||||
* \param[out] segrep Segment representative i.e first nonzero row of each segment
|
||||
* \param[out] repfnz First nonzero location in each row
|
||||
* \param[out] xprune The pruned elimination tree
|
||||
* \param[out] marker work vector
|
||||
* \param parent The elimination tree
|
||||
* \param xplore work vector
|
||||
* \param glu The global data structure
|
||||
*
|
||||
*/
|
||||
|
||||
template <typename Scalar, typename StorageIndex>
|
||||
void SparseLUImpl<Scalar,StorageIndex>::panel_dfs(const Index m, const Index w, const Index jcol, MatrixType& A, IndexVector& perm_r, Index& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu)
|
||||
{
|
||||
Index nextl_col; // Next available position in panel_lsub[*,jj]
|
||||
|
||||
// Initialize pointers
|
||||
VectorBlock<IndexVector> marker1(marker, m, m);
|
||||
nseg = 0;
|
||||
|
||||
panel_dfs_traits<IndexVector> traits(jcol, marker1.data());
|
||||
|
||||
// For each column in the panel
|
||||
for (StorageIndex jj = StorageIndex(jcol); jj < jcol + w; jj++)
|
||||
{
|
||||
nextl_col = (jj - jcol) * m;
|
||||
|
||||
VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero location in each row
|
||||
VectorBlock<ScalarVector> dense_col(dense,nextl_col, m); // Accumulate a column vector here
|
||||
|
||||
|
||||
// For each nnz in A[*, jj] do depth first search
|
||||
for (typename MatrixType::InnerIterator it(A, jj); it; ++it)
|
||||
{
|
||||
Index krow = it.row();
|
||||
dense_col(krow) = it.value();
|
||||
|
||||
StorageIndex kmark = marker(krow);
|
||||
if (kmark == jj)
|
||||
continue; // krow visited before, go to the next nonzero
|
||||
|
||||
dfs_kernel(jj, perm_r, nseg, panel_lsub, segrep, repfnz_col, xprune, marker, parent,
|
||||
xplore, glu, nextl_col, krow, traits);
|
||||
}// end for nonzeros in column jj
|
||||
|
||||
} // end for column jj
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // SPARSELU_PANEL_DFS_H
|
||||
137
examples/ThirdPartyLibs/Eigen/src/SparseLU/SparseLU_pivotL.h
Normal file
137
examples/ThirdPartyLibs/Eigen/src/SparseLU/SparseLU_pivotL.h
Normal file
@@ -0,0 +1,137 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
/*
|
||||
|
||||
* NOTE: This file is the modified version of xpivotL.c file in SuperLU
|
||||
|
||||
* -- SuperLU routine (version 3.0) --
|
||||
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
|
||||
* and Lawrence Berkeley National Lab.
|
||||
* October 15, 2003
|
||||
*
|
||||
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
|
||||
*
|
||||
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
|
||||
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
|
||||
*
|
||||
* Permission is hereby granted to use or copy this program for any
|
||||
* purpose, provided the above notices are retained on all copies.
|
||||
* Permission to modify the code and to distribute modified code is
|
||||
* granted, provided the above notices are retained, and a notice that
|
||||
* the code was modified is included with the above copyright notice.
|
||||
*/
|
||||
#ifndef SPARSELU_PIVOTL_H
|
||||
#define SPARSELU_PIVOTL_H
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
/**
|
||||
* \brief Performs the numerical pivotin on the current column of L, and the CDIV operation.
|
||||
*
|
||||
* Pivot policy :
|
||||
* (1) Compute thresh = u * max_(i>=j) abs(A_ij);
|
||||
* (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN
|
||||
* pivot row = k;
|
||||
* ELSE IF abs(A_jj) >= thresh THEN
|
||||
* pivot row = j;
|
||||
* ELSE
|
||||
* pivot row = m;
|
||||
*
|
||||
* Note: If you absolutely want to use a given pivot order, then set u=0.0.
|
||||
*
|
||||
* \param jcol The current column of L
|
||||
* \param diagpivotthresh diagonal pivoting threshold
|
||||
* \param[in,out] perm_r Row permutation (threshold pivoting)
|
||||
* \param[in] iperm_c column permutation - used to finf diagonal of Pc*A*Pc'
|
||||
* \param[out] pivrow The pivot row
|
||||
* \param glu Global LU data
|
||||
* \return 0 if success, i > 0 if U(i,i) is exactly zero
|
||||
*
|
||||
*/
|
||||
template <typename Scalar, typename StorageIndex>
|
||||
Index SparseLUImpl<Scalar,StorageIndex>::pivotL(const Index jcol, const RealScalar& diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, Index& pivrow, GlobalLU_t& glu)
|
||||
{
|
||||
|
||||
Index fsupc = (glu.xsup)((glu.supno)(jcol)); // First column in the supernode containing the column jcol
|
||||
Index nsupc = jcol - fsupc; // Number of columns in the supernode portion, excluding jcol; nsupc >=0
|
||||
Index lptr = glu.xlsub(fsupc); // pointer to the starting location of the row subscripts for this supernode portion
|
||||
Index nsupr = glu.xlsub(fsupc+1) - lptr; // Number of rows in the supernode
|
||||
Index lda = glu.xlusup(fsupc+1) - glu.xlusup(fsupc); // leading dimension
|
||||
Scalar* lu_sup_ptr = &(glu.lusup.data()[glu.xlusup(fsupc)]); // Start of the current supernode
|
||||
Scalar* lu_col_ptr = &(glu.lusup.data()[glu.xlusup(jcol)]); // Start of jcol in the supernode
|
||||
StorageIndex* lsub_ptr = &(glu.lsub.data()[lptr]); // Start of row indices of the supernode
|
||||
|
||||
// Determine the largest abs numerical value for partial pivoting
|
||||
Index diagind = iperm_c(jcol); // diagonal index
|
||||
RealScalar pivmax(-1.0);
|
||||
Index pivptr = nsupc;
|
||||
Index diag = emptyIdxLU;
|
||||
RealScalar rtemp;
|
||||
Index isub, icol, itemp, k;
|
||||
for (isub = nsupc; isub < nsupr; ++isub) {
|
||||
using std::abs;
|
||||
rtemp = abs(lu_col_ptr[isub]);
|
||||
if (rtemp > pivmax) {
|
||||
pivmax = rtemp;
|
||||
pivptr = isub;
|
||||
}
|
||||
if (lsub_ptr[isub] == diagind) diag = isub;
|
||||
}
|
||||
|
||||
// Test for singularity
|
||||
if ( pivmax <= RealScalar(0.0) ) {
|
||||
// if pivmax == -1, the column is structurally empty, otherwise it is only numerically zero
|
||||
pivrow = pivmax < RealScalar(0.0) ? diagind : lsub_ptr[pivptr];
|
||||
perm_r(pivrow) = StorageIndex(jcol);
|
||||
return (jcol+1);
|
||||
}
|
||||
|
||||
RealScalar thresh = diagpivotthresh * pivmax;
|
||||
|
||||
// Choose appropriate pivotal element
|
||||
|
||||
{
|
||||
// Test if the diagonal element can be used as a pivot (given the threshold value)
|
||||
if (diag >= 0 )
|
||||
{
|
||||
// Diagonal element exists
|
||||
using std::abs;
|
||||
rtemp = abs(lu_col_ptr[diag]);
|
||||
if (rtemp != RealScalar(0.0) && rtemp >= thresh) pivptr = diag;
|
||||
}
|
||||
pivrow = lsub_ptr[pivptr];
|
||||
}
|
||||
|
||||
// Record pivot row
|
||||
perm_r(pivrow) = StorageIndex(jcol);
|
||||
// Interchange row subscripts
|
||||
if (pivptr != nsupc )
|
||||
{
|
||||
std::swap( lsub_ptr[pivptr], lsub_ptr[nsupc] );
|
||||
// Interchange numerical values as well, for the two rows in the whole snode
|
||||
// such that L is indexed the same way as A
|
||||
for (icol = 0; icol <= nsupc; icol++)
|
||||
{
|
||||
itemp = pivptr + icol * lda;
|
||||
std::swap(lu_sup_ptr[itemp], lu_sup_ptr[nsupc + icol * lda]);
|
||||
}
|
||||
}
|
||||
// cdiv operations
|
||||
Scalar temp = Scalar(1.0) / lu_col_ptr[nsupc];
|
||||
for (k = nsupc+1; k < nsupr; k++)
|
||||
lu_col_ptr[k] *= temp;
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // SPARSELU_PIVOTL_H
|
||||
136
examples/ThirdPartyLibs/Eigen/src/SparseLU/SparseLU_pruneL.h
Normal file
136
examples/ThirdPartyLibs/Eigen/src/SparseLU/SparseLU_pruneL.h
Normal file
@@ -0,0 +1,136 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
/*
|
||||
|
||||
* NOTE: This file is the modified version of [s,d,c,z]pruneL.c file in SuperLU
|
||||
|
||||
* -- SuperLU routine (version 2.0) --
|
||||
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
|
||||
* and Lawrence Berkeley National Lab.
|
||||
* November 15, 1997
|
||||
*
|
||||
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
|
||||
*
|
||||
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
|
||||
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
|
||||
*
|
||||
* Permission is hereby granted to use or copy this program for any
|
||||
* purpose, provided the above notices are retained on all copies.
|
||||
* Permission to modify the code and to distribute modified code is
|
||||
* granted, provided the above notices are retained, and a notice that
|
||||
* the code was modified is included with the above copyright notice.
|
||||
*/
|
||||
#ifndef SPARSELU_PRUNEL_H
|
||||
#define SPARSELU_PRUNEL_H
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
/**
|
||||
* \brief Prunes the L-structure.
|
||||
*
|
||||
* It prunes the L-structure of supernodes whose L-structure contains the current pivot row "pivrow"
|
||||
*
|
||||
*
|
||||
* \param jcol The current column of L
|
||||
* \param[in] perm_r Row permutation
|
||||
* \param[out] pivrow The pivot row
|
||||
* \param nseg Number of segments
|
||||
* \param segrep
|
||||
* \param repfnz
|
||||
* \param[out] xprune
|
||||
* \param glu Global LU data
|
||||
*
|
||||
*/
|
||||
template <typename Scalar, typename StorageIndex>
|
||||
void SparseLUImpl<Scalar,StorageIndex>::pruneL(const Index jcol, const IndexVector& perm_r, const Index pivrow, const Index nseg,
|
||||
const IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, GlobalLU_t& glu)
|
||||
{
|
||||
// For each supernode-rep irep in U(*,j]
|
||||
Index jsupno = glu.supno(jcol);
|
||||
Index i,irep,irep1;
|
||||
bool movnum, do_prune = false;
|
||||
Index kmin = 0, kmax = 0, minloc, maxloc,krow;
|
||||
for (i = 0; i < nseg; i++)
|
||||
{
|
||||
irep = segrep(i);
|
||||
irep1 = irep + 1;
|
||||
do_prune = false;
|
||||
|
||||
// Don't prune with a zero U-segment
|
||||
if (repfnz(irep) == emptyIdxLU) continue;
|
||||
|
||||
// If a snode overlaps with the next panel, then the U-segment
|
||||
// is fragmented into two parts -- irep and irep1. We should let
|
||||
// pruning occur at the rep-column in irep1s snode.
|
||||
if (glu.supno(irep) == glu.supno(irep1) ) continue; // don't prune
|
||||
|
||||
// If it has not been pruned & it has a nonz in row L(pivrow,i)
|
||||
if (glu.supno(irep) != jsupno )
|
||||
{
|
||||
if ( xprune (irep) >= glu.xlsub(irep1) )
|
||||
{
|
||||
kmin = glu.xlsub(irep);
|
||||
kmax = glu.xlsub(irep1) - 1;
|
||||
for (krow = kmin; krow <= kmax; krow++)
|
||||
{
|
||||
if (glu.lsub(krow) == pivrow)
|
||||
{
|
||||
do_prune = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (do_prune)
|
||||
{
|
||||
// do a quicksort-type partition
|
||||
// movnum=true means that the num values have to be exchanged
|
||||
movnum = false;
|
||||
if (irep == glu.xsup(glu.supno(irep)) ) // Snode of size 1
|
||||
movnum = true;
|
||||
|
||||
while (kmin <= kmax)
|
||||
{
|
||||
if (perm_r(glu.lsub(kmax)) == emptyIdxLU)
|
||||
kmax--;
|
||||
else if ( perm_r(glu.lsub(kmin)) != emptyIdxLU)
|
||||
kmin++;
|
||||
else
|
||||
{
|
||||
// kmin below pivrow (not yet pivoted), and kmax
|
||||
// above pivrow: interchange the two suscripts
|
||||
std::swap(glu.lsub(kmin), glu.lsub(kmax));
|
||||
|
||||
// If the supernode has only one column, then we
|
||||
// only keep one set of subscripts. For any subscript
|
||||
// intercnahge performed, similar interchange must be
|
||||
// done on the numerical values.
|
||||
if (movnum)
|
||||
{
|
||||
minloc = glu.xlusup(irep) + ( kmin - glu.xlsub(irep) );
|
||||
maxloc = glu.xlusup(irep) + ( kmax - glu.xlsub(irep) );
|
||||
std::swap(glu.lusup(minloc), glu.lusup(maxloc));
|
||||
}
|
||||
kmin++;
|
||||
kmax--;
|
||||
}
|
||||
} // end while
|
||||
|
||||
xprune(irep) = StorageIndex(kmin); //Pruning
|
||||
} // end if do_prune
|
||||
} // end pruning
|
||||
} // End for each U-segment
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // SPARSELU_PRUNEL_H
|
||||
@@ -0,0 +1,83 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
/* This file is a modified version of heap_relax_snode.c file in SuperLU
|
||||
* -- SuperLU routine (version 3.0) --
|
||||
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
|
||||
* and Lawrence Berkeley National Lab.
|
||||
* October 15, 2003
|
||||
*
|
||||
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
|
||||
*
|
||||
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
|
||||
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
|
||||
*
|
||||
* Permission is hereby granted to use or copy this program for any
|
||||
* purpose, provided the above notices are retained on all copies.
|
||||
* Permission to modify the code and to distribute modified code is
|
||||
* granted, provided the above notices are retained, and a notice that
|
||||
* the code was modified is included with the above copyright notice.
|
||||
*/
|
||||
|
||||
#ifndef SPARSELU_RELAX_SNODE_H
|
||||
#define SPARSELU_RELAX_SNODE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
/**
|
||||
* \brief Identify the initial relaxed supernodes
|
||||
*
|
||||
* This routine is applied to a column elimination tree.
|
||||
* It assumes that the matrix has been reordered according to the postorder of the etree
|
||||
* \param n the number of columns
|
||||
* \param et elimination tree
|
||||
* \param relax_columns Maximum number of columns allowed in a relaxed snode
|
||||
* \param descendants Number of descendants of each node in the etree
|
||||
* \param relax_end last column in a supernode
|
||||
*/
|
||||
template <typename Scalar, typename StorageIndex>
|
||||
void SparseLUImpl<Scalar,StorageIndex>::relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end)
|
||||
{
|
||||
|
||||
// compute the number of descendants of each node in the etree
|
||||
Index parent;
|
||||
relax_end.setConstant(emptyIdxLU);
|
||||
descendants.setZero();
|
||||
for (Index j = 0; j < n; j++)
|
||||
{
|
||||
parent = et(j);
|
||||
if (parent != n) // not the dummy root
|
||||
descendants(parent) += descendants(j) + 1;
|
||||
}
|
||||
// Identify the relaxed supernodes by postorder traversal of the etree
|
||||
Index snode_start; // beginning of a snode
|
||||
for (Index j = 0; j < n; )
|
||||
{
|
||||
parent = et(j);
|
||||
snode_start = j;
|
||||
while ( parent != n && descendants(parent) < relax_columns )
|
||||
{
|
||||
j = parent;
|
||||
parent = et(j);
|
||||
}
|
||||
// Found a supernode in postordered etree, j is the last column
|
||||
relax_end(snode_start) = StorageIndex(j); // Record last column
|
||||
j++;
|
||||
// Search for a new leaf
|
||||
while (descendants(j) != 0 && j < n) j++;
|
||||
} // End postorder traversal of the etree
|
||||
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
#endif
|
||||
Reference in New Issue
Block a user