Add preliminary PhysX 4.0 backend for PyBullet

Add inverse dynamics / mass matrix code from DeepMimic, thanks to Xue Bin (Jason) Peng
Add example how to use stable PD control for humanoid with spherical joints (see humanoidMotionCapture.py)
Fix related to TinyRenderer object transforms not updating when using collision filtering
This commit is contained in:
erwincoumans
2019-01-22 21:08:37 -08:00
parent 80684f44ea
commit ae8e83988b
366 changed files with 131855 additions and 359 deletions

View File

@@ -0,0 +1,775 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
// Copyright (C) 2012-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_SPARSE_LU_H
#define EIGEN_SPARSE_LU_H
namespace Eigen {
template <typename _MatrixType, typename _OrderingType = COLAMDOrdering<typename _MatrixType::StorageIndex> > class SparseLU;
template <typename MappedSparseMatrixType> struct SparseLUMatrixLReturnType;
template <typename MatrixLType, typename MatrixUType> struct SparseLUMatrixUReturnType;
/** \ingroup SparseLU_Module
* \class SparseLU
*
* \brief Sparse supernodal LU factorization for general matrices
*
* This class implements the supernodal LU factorization for general matrices.
* It uses the main techniques from the sequential SuperLU package
* (http://crd-legacy.lbl.gov/~xiaoye/SuperLU/). It handles transparently real
* and complex arithmetics with single and double precision, depending on the
* scalar type of your input matrix.
* The code has been optimized to provide BLAS-3 operations during supernode-panel updates.
* It benefits directly from the built-in high-performant Eigen BLAS routines.
* Moreover, when the size of a supernode is very small, the BLAS calls are avoided to
* enable a better optimization from the compiler. For best performance,
* you should compile it with NDEBUG flag to avoid the numerous bounds checking on vectors.
*
* An important parameter of this class is the ordering method. It is used to reorder the columns
* (and eventually the rows) of the matrix to reduce the number of new elements that are created during
* numerical factorization. The cheapest method available is COLAMD.
* See \link OrderingMethods_Module the OrderingMethods module \endlink for the list of
* built-in and external ordering methods.
*
* Simple example with key steps
* \code
* VectorXd x(n), b(n);
* SparseMatrix<double, ColMajor> A;
* SparseLU<SparseMatrix<scalar, ColMajor>, COLAMDOrdering<Index> > solver;
* // fill A and b;
* // Compute the ordering permutation vector from the structural pattern of A
* solver.analyzePattern(A);
* // Compute the numerical factorization
* solver.factorize(A);
* //Use the factors to solve the linear system
* x = solver.solve(b);
* \endcode
*
* \warning The input matrix A should be in a \b compressed and \b column-major form.
* Otherwise an expensive copy will be made. You can call the inexpensive makeCompressed() to get a compressed matrix.
*
* \note Unlike the initial SuperLU implementation, there is no step to equilibrate the matrix.
* For badly scaled matrices, this step can be useful to reduce the pivoting during factorization.
* If this is the case for your matrices, you can try the basic scaling method at
* "unsupported/Eigen/src/IterativeSolvers/Scaling.h"
*
* \tparam _MatrixType The type of the sparse matrix. It must be a column-major SparseMatrix<>
* \tparam _OrderingType The ordering method to use, either AMD, COLAMD or METIS. Default is COLMAD
*
* \implsparsesolverconcept
*
* \sa \ref TutorialSparseSolverConcept
* \sa \ref OrderingMethods_Module
*/
template <typename _MatrixType, typename _OrderingType>
class SparseLU : public SparseSolverBase<SparseLU<_MatrixType,_OrderingType> >, public internal::SparseLUImpl<typename _MatrixType::Scalar, typename _MatrixType::StorageIndex>
{
protected:
typedef SparseSolverBase<SparseLU<_MatrixType,_OrderingType> > APIBase;
using APIBase::m_isInitialized;
public:
using APIBase::_solve_impl;
typedef _MatrixType MatrixType;
typedef _OrderingType OrderingType;
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
typedef typename MatrixType::StorageIndex StorageIndex;
typedef SparseMatrix<Scalar,ColMajor,StorageIndex> NCMatrix;
typedef internal::MappedSuperNodalMatrix<Scalar, StorageIndex> SCMatrix;
typedef Matrix<Scalar,Dynamic,1> ScalarVector;
typedef Matrix<StorageIndex,Dynamic,1> IndexVector;
typedef PermutationMatrix<Dynamic, Dynamic, StorageIndex> PermutationType;
typedef internal::SparseLUImpl<Scalar, StorageIndex> Base;
enum {
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
};
public:
SparseLU():m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)
{
initperfvalues();
}
explicit SparseLU(const MatrixType& matrix)
: m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)
{
initperfvalues();
compute(matrix);
}
~SparseLU()
{
// Free all explicit dynamic pointers
}
void analyzePattern (const MatrixType& matrix);
void factorize (const MatrixType& matrix);
void simplicialfactorize(const MatrixType& matrix);
/**
* Compute the symbolic and numeric factorization of the input sparse matrix.
* The input matrix should be in column-major storage.
*/
void compute (const MatrixType& matrix)
{
// Analyze
analyzePattern(matrix);
//Factorize
factorize(matrix);
}
inline Index rows() const { return m_mat.rows(); }
inline Index cols() const { return m_mat.cols(); }
/** Indicate that the pattern of the input matrix is symmetric */
void isSymmetric(bool sym)
{
m_symmetricmode = sym;
}
/** \returns an expression of the matrix L, internally stored as supernodes
* The only operation available with this expression is the triangular solve
* \code
* y = b; matrixL().solveInPlace(y);
* \endcode
*/
SparseLUMatrixLReturnType<SCMatrix> matrixL() const
{
return SparseLUMatrixLReturnType<SCMatrix>(m_Lstore);
}
/** \returns an expression of the matrix U,
* The only operation available with this expression is the triangular solve
* \code
* y = b; matrixU().solveInPlace(y);
* \endcode
*/
SparseLUMatrixUReturnType<SCMatrix,MappedSparseMatrix<Scalar,ColMajor,StorageIndex> > matrixU() const
{
return SparseLUMatrixUReturnType<SCMatrix, MappedSparseMatrix<Scalar,ColMajor,StorageIndex> >(m_Lstore, m_Ustore);
}
/**
* \returns a reference to the row matrix permutation \f$ P_r \f$ such that \f$P_r A P_c^T = L U\f$
* \sa colsPermutation()
*/
inline const PermutationType& rowsPermutation() const
{
return m_perm_r;
}
/**
* \returns a reference to the column matrix permutation\f$ P_c^T \f$ such that \f$P_r A P_c^T = L U\f$
* \sa rowsPermutation()
*/
inline const PermutationType& colsPermutation() const
{
return m_perm_c;
}
/** Set the threshold used for a diagonal entry to be an acceptable pivot. */
void setPivotThreshold(const RealScalar& thresh)
{
m_diagpivotthresh = thresh;
}
#ifdef EIGEN_PARSED_BY_DOXYGEN
/** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A.
*
* \warning the destination matrix X in X = this->solve(B) must be colmun-major.
*
* \sa compute()
*/
template<typename Rhs>
inline const Solve<SparseLU, Rhs> solve(const MatrixBase<Rhs>& B) const;
#endif // EIGEN_PARSED_BY_DOXYGEN
/** \brief Reports whether previous computation was successful.
*
* \returns \c Success if computation was succesful,
* \c NumericalIssue if the LU factorization reports a problem, zero diagonal for instance
* \c InvalidInput if the input matrix is invalid
*
* \sa iparm()
*/
ComputationInfo info() const
{
eigen_assert(m_isInitialized && "Decomposition is not initialized.");
return m_info;
}
/**
* \returns A string describing the type of error
*/
std::string lastErrorMessage() const
{
return m_lastError;
}
template<typename Rhs, typename Dest>
bool _solve_impl(const MatrixBase<Rhs> &B, MatrixBase<Dest> &X_base) const
{
Dest& X(X_base.derived());
eigen_assert(m_factorizationIsOk && "The matrix should be factorized first");
EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0,
THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
// Permute the right hand side to form X = Pr*B
// on return, X is overwritten by the computed solution
X.resize(B.rows(),B.cols());
// this ugly const_cast_derived() helps to detect aliasing when applying the permutations
for(Index j = 0; j < B.cols(); ++j)
X.col(j) = rowsPermutation() * B.const_cast_derived().col(j);
//Forward substitution with L
this->matrixL().solveInPlace(X);
this->matrixU().solveInPlace(X);
// Permute back the solution
for (Index j = 0; j < B.cols(); ++j)
X.col(j) = colsPermutation().inverse() * X.col(j);
return true;
}
/**
* \returns the absolute value of the determinant of the matrix of which
* *this is the QR decomposition.
*
* \warning a determinant can be very big or small, so for matrices
* of large enough dimension, there is a risk of overflow/underflow.
* One way to work around that is to use logAbsDeterminant() instead.
*
* \sa logAbsDeterminant(), signDeterminant()
*/
Scalar absDeterminant()
{
using std::abs;
eigen_assert(m_factorizationIsOk && "The matrix should be factorized first.");
// Initialize with the determinant of the row matrix
Scalar det = Scalar(1.);
// Note that the diagonal blocks of U are stored in supernodes,
// which are available in the L part :)
for (Index j = 0; j < this->cols(); ++j)
{
for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)
{
if(it.index() == j)
{
det *= abs(it.value());
break;
}
}
}
return det;
}
/** \returns the natural log of the absolute value of the determinant of the matrix
* of which **this is the QR decomposition
*
* \note This method is useful to work around the risk of overflow/underflow that's
* inherent to the determinant computation.
*
* \sa absDeterminant(), signDeterminant()
*/
Scalar logAbsDeterminant() const
{
using std::log;
using std::abs;
eigen_assert(m_factorizationIsOk && "The matrix should be factorized first.");
Scalar det = Scalar(0.);
for (Index j = 0; j < this->cols(); ++j)
{
for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)
{
if(it.row() < j) continue;
if(it.row() == j)
{
det += log(abs(it.value()));
break;
}
}
}
return det;
}
/** \returns A number representing the sign of the determinant
*
* \sa absDeterminant(), logAbsDeterminant()
*/
Scalar signDeterminant()
{
eigen_assert(m_factorizationIsOk && "The matrix should be factorized first.");
// Initialize with the determinant of the row matrix
Index det = 1;
// Note that the diagonal blocks of U are stored in supernodes,
// which are available in the L part :)
for (Index j = 0; j < this->cols(); ++j)
{
for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)
{
if(it.index() == j)
{
if(it.value()<0)
det = -det;
else if(it.value()==0)
return 0;
break;
}
}
}
return det * m_detPermR * m_detPermC;
}
/** \returns The determinant of the matrix.
*
* \sa absDeterminant(), logAbsDeterminant()
*/
Scalar determinant()
{
eigen_assert(m_factorizationIsOk && "The matrix should be factorized first.");
// Initialize with the determinant of the row matrix
Scalar det = Scalar(1.);
// Note that the diagonal blocks of U are stored in supernodes,
// which are available in the L part :)
for (Index j = 0; j < this->cols(); ++j)
{
for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)
{
if(it.index() == j)
{
det *= it.value();
break;
}
}
}
return (m_detPermR * m_detPermC) > 0 ? det : -det;
}
protected:
// Functions
void initperfvalues()
{
m_perfv.panel_size = 16;
m_perfv.relax = 1;
m_perfv.maxsuper = 128;
m_perfv.rowblk = 16;
m_perfv.colblk = 8;
m_perfv.fillfactor = 20;
}
// Variables
mutable ComputationInfo m_info;
bool m_factorizationIsOk;
bool m_analysisIsOk;
std::string m_lastError;
NCMatrix m_mat; // The input (permuted ) matrix
SCMatrix m_Lstore; // The lower triangular matrix (supernodal)
MappedSparseMatrix<Scalar,ColMajor,StorageIndex> m_Ustore; // The upper triangular matrix
PermutationType m_perm_c; // Column permutation
PermutationType m_perm_r ; // Row permutation
IndexVector m_etree; // Column elimination tree
typename Base::GlobalLU_t m_glu;
// SparseLU options
bool m_symmetricmode;
// values for performance
internal::perfvalues m_perfv;
RealScalar m_diagpivotthresh; // Specifies the threshold used for a diagonal entry to be an acceptable pivot
Index m_nnzL, m_nnzU; // Nonzeros in L and U factors
Index m_detPermR, m_detPermC; // Determinants of the permutation matrices
private:
// Disable copy constructor
SparseLU (const SparseLU& );
}; // End class SparseLU
// Functions needed by the anaysis phase
/**
* Compute the column permutation to minimize the fill-in
*
* - Apply this permutation to the input matrix -
*
* - Compute the column elimination tree on the permuted matrix
*
* - Postorder the elimination tree and the column permutation
*
*/
template <typename MatrixType, typename OrderingType>
void SparseLU<MatrixType, OrderingType>::analyzePattern(const MatrixType& mat)
{
//TODO It is possible as in SuperLU to compute row and columns scaling vectors to equilibrate the matrix mat.
// Firstly, copy the whole input matrix.
m_mat = mat;
// Compute fill-in ordering
OrderingType ord;
ord(m_mat,m_perm_c);
// Apply the permutation to the column of the input matrix
if (m_perm_c.size())
{
m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. FIXME : This vector is filled but not subsequently used.
// Then, permute only the column pointers
ei_declare_aligned_stack_constructed_variable(StorageIndex,outerIndexPtr,mat.cols()+1,mat.isCompressed()?const_cast<StorageIndex*>(mat.outerIndexPtr()):0);
// If the input matrix 'mat' is uncompressed, then the outer-indices do not match the ones of m_mat, and a copy is thus needed.
if(!mat.isCompressed())
IndexVector::Map(outerIndexPtr, mat.cols()+1) = IndexVector::Map(m_mat.outerIndexPtr(),mat.cols()+1);
// Apply the permutation and compute the nnz per column.
for (Index i = 0; i < mat.cols(); i++)
{
m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i];
m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i+1] - outerIndexPtr[i];
}
}
// Compute the column elimination tree of the permuted matrix
IndexVector firstRowElt;
internal::coletree(m_mat, m_etree,firstRowElt);
// In symmetric mode, do not do postorder here
if (!m_symmetricmode) {
IndexVector post, iwork;
// Post order etree
internal::treePostorder(StorageIndex(m_mat.cols()), m_etree, post);
// Renumber etree in postorder
Index m = m_mat.cols();
iwork.resize(m+1);
for (Index i = 0; i < m; ++i) iwork(post(i)) = post(m_etree(i));
m_etree = iwork;
// Postmultiply A*Pc by post, i.e reorder the matrix according to the postorder of the etree
PermutationType post_perm(m);
for (Index i = 0; i < m; i++)
post_perm.indices()(i) = post(i);
// Combine the two permutations : postorder the permutation for future use
if(m_perm_c.size()) {
m_perm_c = post_perm * m_perm_c;
}
} // end postordering
m_analysisIsOk = true;
}
// Functions needed by the numerical factorization phase
/**
* - Numerical factorization
* - Interleaved with the symbolic factorization
* On exit, info is
*
* = 0: successful factorization
*
* > 0: if info = i, and i is
*
* <= A->ncol: U(i,i) is exactly zero. The factorization has
* been completed, but the factor U is exactly singular,
* and division by zero will occur if it is used to solve a
* system of equations.
*
* > A->ncol: number of bytes allocated when memory allocation
* failure occurred, plus A->ncol. If lwork = -1, it is
* the estimated amount of space needed, plus A->ncol.
*/
template <typename MatrixType, typename OrderingType>
void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
{
using internal::emptyIdxLU;
eigen_assert(m_analysisIsOk && "analyzePattern() should be called first");
eigen_assert((matrix.rows() == matrix.cols()) && "Only for squared matrices");
typedef typename IndexVector::Scalar StorageIndex;
m_isInitialized = true;
// Apply the column permutation computed in analyzepattern()
// m_mat = matrix * m_perm_c.inverse();
m_mat = matrix;
if (m_perm_c.size())
{
m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers.
//Then, permute only the column pointers
const StorageIndex * outerIndexPtr;
if (matrix.isCompressed()) outerIndexPtr = matrix.outerIndexPtr();
else
{
StorageIndex* outerIndexPtr_t = new StorageIndex[matrix.cols()+1];
for(Index i = 0; i <= matrix.cols(); i++) outerIndexPtr_t[i] = m_mat.outerIndexPtr()[i];
outerIndexPtr = outerIndexPtr_t;
}
for (Index i = 0; i < matrix.cols(); i++)
{
m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i];
m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i+1] - outerIndexPtr[i];
}
if(!matrix.isCompressed()) delete[] outerIndexPtr;
}
else
{ //FIXME This should not be needed if the empty permutation is handled transparently
m_perm_c.resize(matrix.cols());
for(StorageIndex i = 0; i < matrix.cols(); ++i) m_perm_c.indices()(i) = i;
}
Index m = m_mat.rows();
Index n = m_mat.cols();
Index nnz = m_mat.nonZeros();
Index maxpanel = m_perfv.panel_size * m;
// Allocate working storage common to the factor routines
Index lwork = 0;
Index info = Base::memInit(m, n, nnz, lwork, m_perfv.fillfactor, m_perfv.panel_size, m_glu);
if (info)
{
m_lastError = "UNABLE TO ALLOCATE WORKING MEMORY\n\n" ;
m_factorizationIsOk = false;
return ;
}
// Set up pointers for integer working arrays
IndexVector segrep(m); segrep.setZero();
IndexVector parent(m); parent.setZero();
IndexVector xplore(m); xplore.setZero();
IndexVector repfnz(maxpanel);
IndexVector panel_lsub(maxpanel);
IndexVector xprune(n); xprune.setZero();
IndexVector marker(m*internal::LUNoMarker); marker.setZero();
repfnz.setConstant(-1);
panel_lsub.setConstant(-1);
// Set up pointers for scalar working arrays
ScalarVector dense;
dense.setZero(maxpanel);
ScalarVector tempv;
tempv.setZero(internal::LUnumTempV(m, m_perfv.panel_size, m_perfv.maxsuper, /*m_perfv.rowblk*/m) );
// Compute the inverse of perm_c
PermutationType iperm_c(m_perm_c.inverse());
// Identify initial relaxed snodes
IndexVector relax_end(n);
if ( m_symmetricmode == true )
Base::heap_relax_snode(n, m_etree, m_perfv.relax, marker, relax_end);
else
Base::relax_snode(n, m_etree, m_perfv.relax, marker, relax_end);
m_perm_r.resize(m);
m_perm_r.indices().setConstant(-1);
marker.setConstant(-1);
m_detPermR = 1; // Record the determinant of the row permutation
m_glu.supno(0) = emptyIdxLU; m_glu.xsup.setConstant(0);
m_glu.xsup(0) = m_glu.xlsub(0) = m_glu.xusub(0) = m_glu.xlusup(0) = Index(0);
// Work on one 'panel' at a time. A panel is one of the following :
// (a) a relaxed supernode at the bottom of the etree, or
// (b) panel_size contiguous columns, <panel_size> defined by the user
Index jcol;
IndexVector panel_histo(n);
Index pivrow; // Pivotal row number in the original row matrix
Index nseg1; // Number of segments in U-column above panel row jcol
Index nseg; // Number of segments in each U-column
Index irep;
Index i, k, jj;
for (jcol = 0; jcol < n; )
{
// Adjust panel size so that a panel won't overlap with the next relaxed snode.
Index panel_size = m_perfv.panel_size; // upper bound on panel width
for (k = jcol + 1; k < (std::min)(jcol+panel_size, n); k++)
{
if (relax_end(k) != emptyIdxLU)
{
panel_size = k - jcol;
break;
}
}
if (k == n)
panel_size = n - jcol;
// Symbolic outer factorization on a panel of columns
Base::panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu);
// Numeric sup-panel updates in topological order
Base::panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_glu);
// Sparse LU within the panel, and below the panel diagonal
for ( jj = jcol; jj< jcol + panel_size; jj++)
{
k = (jj - jcol) * m; // Column index for w-wide arrays
nseg = nseg1; // begin after all the panel segments
//Depth-first-search for the current column
VectorBlock<IndexVector> panel_lsubk(panel_lsub, k, m);
VectorBlock<IndexVector> repfnz_k(repfnz, k, m);
info = Base::column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu);
if ( info )
{
m_lastError = "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() ";
m_info = NumericalIssue;
m_factorizationIsOk = false;
return;
}
// Numeric updates to this column
VectorBlock<ScalarVector> dense_k(dense, k, m);
VectorBlock<IndexVector> segrep_k(segrep, nseg1, m-nseg1);
info = Base::column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu);
if ( info )
{
m_lastError = "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() ";
m_info = NumericalIssue;
m_factorizationIsOk = false;
return;
}
// Copy the U-segments to ucol(*)
info = Base::copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu);
if ( info )
{
m_lastError = "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() ";
m_info = NumericalIssue;
m_factorizationIsOk = false;
return;
}
// Form the L-segment
info = Base::pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu);
if ( info )
{
m_lastError = "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT ";
std::ostringstream returnInfo;
returnInfo << info;
m_lastError += returnInfo.str();
m_info = NumericalIssue;
m_factorizationIsOk = false;
return;
}
// Update the determinant of the row permutation matrix
// FIXME: the following test is not correct, we should probably take iperm_c into account and pivrow is not directly the row pivot.
if (pivrow != jj) m_detPermR = -m_detPermR;
// Prune columns (0:jj-1) using column jj
Base::pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu);
// Reset repfnz for this column
for (i = 0; i < nseg; i++)
{
irep = segrep(i);
repfnz_k(irep) = emptyIdxLU;
}
} // end SparseLU within the panel
jcol += panel_size; // Move to the next panel
} // end for -- end elimination
m_detPermR = m_perm_r.determinant();
m_detPermC = m_perm_c.determinant();
// Count the number of nonzeros in factors
Base::countnz(n, m_nnzL, m_nnzU, m_glu);
// Apply permutation to the L subscripts
Base::fixupL(n, m_perm_r.indices(), m_glu);
// Create supernode matrix L
m_Lstore.setInfos(m, n, m_glu.lusup, m_glu.xlusup, m_glu.lsub, m_glu.xlsub, m_glu.supno, m_glu.xsup);
// Create the column major upper sparse matrix U;
new (&m_Ustore) MappedSparseMatrix<Scalar, ColMajor, StorageIndex> ( m, n, m_nnzU, m_glu.xusub.data(), m_glu.usub.data(), m_glu.ucol.data() );
m_info = Success;
m_factorizationIsOk = true;
}
template<typename MappedSupernodalType>
struct SparseLUMatrixLReturnType : internal::no_assignment_operator
{
typedef typename MappedSupernodalType::Scalar Scalar;
explicit SparseLUMatrixLReturnType(const MappedSupernodalType& mapL) : m_mapL(mapL)
{ }
Index rows() { return m_mapL.rows(); }
Index cols() { return m_mapL.cols(); }
template<typename Dest>
void solveInPlace( MatrixBase<Dest> &X) const
{
m_mapL.solveInPlace(X);
}
const MappedSupernodalType& m_mapL;
};
template<typename MatrixLType, typename MatrixUType>
struct SparseLUMatrixUReturnType : internal::no_assignment_operator
{
typedef typename MatrixLType::Scalar Scalar;
SparseLUMatrixUReturnType(const MatrixLType& mapL, const MatrixUType& mapU)
: m_mapL(mapL),m_mapU(mapU)
{ }
Index rows() { return m_mapL.rows(); }
Index cols() { return m_mapL.cols(); }
template<typename Dest> void solveInPlace(MatrixBase<Dest> &X) const
{
Index nrhs = X.cols();
Index n = X.rows();
// Backward solve with U
for (Index k = m_mapL.nsuper(); k >= 0; k--)
{
Index fsupc = m_mapL.supToCol()[k];
Index lda = m_mapL.colIndexPtr()[fsupc+1] - m_mapL.colIndexPtr()[fsupc]; // leading dimension
Index nsupc = m_mapL.supToCol()[k+1] - fsupc;
Index luptr = m_mapL.colIndexPtr()[fsupc];
if (nsupc == 1)
{
for (Index j = 0; j < nrhs; j++)
{
X(fsupc, j) /= m_mapL.valuePtr()[luptr];
}
}
else
{
Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(m_mapL.valuePtr()[luptr]), nsupc, nsupc, OuterStride<>(lda) );
Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) );
U = A.template triangularView<Upper>().solve(U);
}
for (Index j = 0; j < nrhs; ++j)
{
for (Index jcol = fsupc; jcol < fsupc + nsupc; jcol++)
{
typename MatrixUType::InnerIterator it(m_mapU, jcol);
for ( ; it; ++it)
{
Index irow = it.index();
X(irow, j) -= X(jcol, j) * it.value();
}
}
}
} // End For U-solve
}
const MatrixLType& m_mapL;
const MatrixUType& m_mapU;
};
} // End namespace Eigen
#endif

View File

@@ -0,0 +1,66 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef SPARSELU_IMPL_H
#define SPARSELU_IMPL_H
namespace Eigen {
namespace internal {
/** \ingroup SparseLU_Module
* \class SparseLUImpl
* Base class for sparseLU
*/
template <typename Scalar, typename StorageIndex>
class SparseLUImpl
{
public:
typedef Matrix<Scalar,Dynamic,1> ScalarVector;
typedef Matrix<StorageIndex,Dynamic,1> IndexVector;
typedef Matrix<Scalar,Dynamic,Dynamic,ColMajor> ScalarMatrix;
typedef Map<ScalarMatrix, 0, OuterStride<> > MappedMatrixBlock;
typedef typename ScalarVector::RealScalar RealScalar;
typedef Ref<Matrix<Scalar,Dynamic,1> > BlockScalarVector;
typedef Ref<Matrix<StorageIndex,Dynamic,1> > BlockIndexVector;
typedef LU_GlobalLU_t<IndexVector, ScalarVector> GlobalLU_t;
typedef SparseMatrix<Scalar,ColMajor,StorageIndex> MatrixType;
protected:
template <typename VectorType>
Index expand(VectorType& vec, Index& length, Index nbElts, Index keep_prev, Index& num_expansions);
Index memInit(Index m, Index n, Index annz, Index lwork, Index fillratio, Index panel_size, GlobalLU_t& glu);
template <typename VectorType>
Index memXpand(VectorType& vec, Index& maxlen, Index nbElts, MemType memtype, Index& num_expansions);
void heap_relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end);
void relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end);
Index snode_dfs(const Index jcol, const Index kcol,const MatrixType& mat, IndexVector& xprune, IndexVector& marker, GlobalLU_t& glu);
Index snode_bmod (const Index jcol, const Index fsupc, ScalarVector& dense, GlobalLU_t& glu);
Index pivotL(const Index jcol, const RealScalar& diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, Index& pivrow, GlobalLU_t& glu);
template <typename Traits>
void dfs_kernel(const StorageIndex jj, IndexVector& perm_r,
Index& nseg, IndexVector& panel_lsub, IndexVector& segrep,
Ref<IndexVector> repfnz_col, IndexVector& xprune, Ref<IndexVector> marker, IndexVector& parent,
IndexVector& xplore, GlobalLU_t& glu, Index& nextl_col, Index krow, Traits& traits);
void panel_dfs(const Index m, const Index w, const Index jcol, MatrixType& A, IndexVector& perm_r, Index& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu);
void panel_bmod(const Index m, const Index w, const Index jcol, const Index nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, GlobalLU_t& glu);
Index column_dfs(const Index m, const Index jcol, IndexVector& perm_r, Index maxsuper, Index& nseg, BlockIndexVector lsub_col, IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu);
Index column_bmod(const Index jcol, const Index nseg, BlockScalarVector dense, ScalarVector& tempv, BlockIndexVector segrep, BlockIndexVector repfnz, Index fpanelc, GlobalLU_t& glu);
Index copy_to_ucol(const Index jcol, const Index nseg, IndexVector& segrep, BlockIndexVector repfnz ,IndexVector& perm_r, BlockScalarVector dense, GlobalLU_t& glu);
void pruneL(const Index jcol, const IndexVector& perm_r, const Index pivrow, const Index nseg, const IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, GlobalLU_t& glu);
void countnz(const Index n, Index& nnzL, Index& nnzU, GlobalLU_t& glu);
void fixupL(const Index n, const IndexVector& perm_r, GlobalLU_t& glu);
template<typename , typename >
friend struct column_dfs_traits;
};
} // end namespace internal
} // namespace Eigen
#endif

View File

@@ -0,0 +1,226 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
/*
* NOTE: This file is the modified version of [s,d,c,z]memory.c files in SuperLU
* -- SuperLU routine (version 3.1) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* August 1, 2008
*
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
*
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
*
* Permission is hereby granted to use or copy this program for any
* purpose, provided the above notices are retained on all copies.
* Permission to modify the code and to distribute modified code is
* granted, provided the above notices are retained, and a notice that
* the code was modified is included with the above copyright notice.
*/
#ifndef EIGEN_SPARSELU_MEMORY
#define EIGEN_SPARSELU_MEMORY
namespace Eigen {
namespace internal {
enum { LUNoMarker = 3 };
enum {emptyIdxLU = -1};
inline Index LUnumTempV(Index& m, Index& w, Index& t, Index& b)
{
return (std::max)(m, (t+b)*w);
}
template< typename Scalar>
inline Index LUTempSpace(Index&m, Index& w)
{
return (2*w + 4 + LUNoMarker) * m * sizeof(Index) + (w + 1) * m * sizeof(Scalar);
}
/**
* Expand the existing storage to accomodate more fill-ins
* \param vec Valid pointer to the vector to allocate or expand
* \param[in,out] length At input, contain the current length of the vector that is to be increased. At output, length of the newly allocated vector
* \param[in] nbElts Current number of elements in the factors
* \param keep_prev 1: use length and do not expand the vector; 0: compute new_len and expand
* \param[in,out] num_expansions Number of times the memory has been expanded
*/
template <typename Scalar, typename StorageIndex>
template <typename VectorType>
Index SparseLUImpl<Scalar,StorageIndex>::expand(VectorType& vec, Index& length, Index nbElts, Index keep_prev, Index& num_expansions)
{
float alpha = 1.5; // Ratio of the memory increase
Index new_len; // New size of the allocated memory
if(num_expansions == 0 || keep_prev)
new_len = length ; // First time allocate requested
else
new_len = (std::max)(length+1,Index(alpha * length));
VectorType old_vec; // Temporary vector to hold the previous values
if (nbElts > 0 )
old_vec = vec.segment(0,nbElts);
//Allocate or expand the current vector
#ifdef EIGEN_EXCEPTIONS
try
#endif
{
vec.resize(new_len);
}
#ifdef EIGEN_EXCEPTIONS
catch(std::bad_alloc& )
#else
if(!vec.size())
#endif
{
if (!num_expansions)
{
// First time to allocate from LUMemInit()
// Let LUMemInit() deals with it.
return -1;
}
if (keep_prev)
{
// In this case, the memory length should not not be reduced
return new_len;
}
else
{
// Reduce the size and increase again
Index tries = 0; // Number of attempts
do
{
alpha = (alpha + 1)/2;
new_len = (std::max)(length+1,Index(alpha * length));
#ifdef EIGEN_EXCEPTIONS
try
#endif
{
vec.resize(new_len);
}
#ifdef EIGEN_EXCEPTIONS
catch(std::bad_alloc& )
#else
if (!vec.size())
#endif
{
tries += 1;
if ( tries > 10) return new_len;
}
} while (!vec.size());
}
}
//Copy the previous values to the newly allocated space
if (nbElts > 0)
vec.segment(0, nbElts) = old_vec;
length = new_len;
if(num_expansions) ++num_expansions;
return 0;
}
/**
* \brief Allocate various working space for the numerical factorization phase.
* \param m number of rows of the input matrix
* \param n number of columns
* \param annz number of initial nonzeros in the matrix
* \param lwork if lwork=-1, this routine returns an estimated size of the required memory
* \param glu persistent data to facilitate multiple factors : will be deleted later ??
* \param fillratio estimated ratio of fill in the factors
* \param panel_size Size of a panel
* \return an estimated size of the required memory if lwork = -1; otherwise, return the size of actually allocated memory when allocation failed, and 0 on success
* \note Unlike SuperLU, this routine does not support successive factorization with the same pattern and the same row permutation
*/
template <typename Scalar, typename StorageIndex>
Index SparseLUImpl<Scalar,StorageIndex>::memInit(Index m, Index n, Index annz, Index lwork, Index fillratio, Index panel_size, GlobalLU_t& glu)
{
Index& num_expansions = glu.num_expansions; //No memory expansions so far
num_expansions = 0;
glu.nzumax = glu.nzlumax = (std::min)(fillratio * (annz+1) / n, m) * n; // estimated number of nonzeros in U
glu.nzlmax = (std::max)(Index(4), fillratio) * (annz+1) / 4; // estimated nnz in L factor
// Return the estimated size to the user if necessary
Index tempSpace;
tempSpace = (2*panel_size + 4 + LUNoMarker) * m * sizeof(Index) + (panel_size + 1) * m * sizeof(Scalar);
if (lwork == emptyIdxLU)
{
Index estimated_size;
estimated_size = (5 * n + 5) * sizeof(Index) + tempSpace
+ (glu.nzlmax + glu.nzumax) * sizeof(Index) + (glu.nzlumax+glu.nzumax) * sizeof(Scalar) + n;
return estimated_size;
}
// Setup the required space
// First allocate Integer pointers for L\U factors
glu.xsup.resize(n+1);
glu.supno.resize(n+1);
glu.xlsub.resize(n+1);
glu.xlusup.resize(n+1);
glu.xusub.resize(n+1);
// Reserve memory for L/U factors
do
{
if( (expand<ScalarVector>(glu.lusup, glu.nzlumax, 0, 0, num_expansions)<0)
|| (expand<ScalarVector>(glu.ucol, glu.nzumax, 0, 0, num_expansions)<0)
|| (expand<IndexVector> (glu.lsub, glu.nzlmax, 0, 0, num_expansions)<0)
|| (expand<IndexVector> (glu.usub, glu.nzumax, 0, 1, num_expansions)<0) )
{
//Reduce the estimated size and retry
glu.nzlumax /= 2;
glu.nzumax /= 2;
glu.nzlmax /= 2;
if (glu.nzlumax < annz ) return glu.nzlumax;
}
} while (!glu.lusup.size() || !glu.ucol.size() || !glu.lsub.size() || !glu.usub.size());
++num_expansions;
return 0;
} // end LuMemInit
/**
* \brief Expand the existing storage
* \param vec vector to expand
* \param[in,out] maxlen On input, previous size of vec (Number of elements to copy ). on output, new size
* \param nbElts current number of elements in the vector.
* \param memtype Type of the element to expand
* \param num_expansions Number of expansions
* \return 0 on success, > 0 size of the memory allocated so far
*/
template <typename Scalar, typename StorageIndex>
template <typename VectorType>
Index SparseLUImpl<Scalar,StorageIndex>::memXpand(VectorType& vec, Index& maxlen, Index nbElts, MemType memtype, Index& num_expansions)
{
Index failed_size;
if (memtype == USUB)
failed_size = this->expand<VectorType>(vec, maxlen, nbElts, 1, num_expansions);
else
failed_size = this->expand<VectorType>(vec, maxlen, nbElts, 0, num_expansions);
if (failed_size)
return failed_size;
return 0 ;
}
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_SPARSELU_MEMORY

View File

@@ -0,0 +1,110 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
/*
* NOTE: This file comes from a partly modified version of files slu_[s,d,c,z]defs.h
* -- SuperLU routine (version 4.1) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* November, 2010
*
* Global data structures used in LU factorization -
*
* nsuper: #supernodes = nsuper + 1, numbered [0, nsuper].
* (xsup,supno): supno[i] is the supernode no to which i belongs;
* xsup(s) points to the beginning of the s-th supernode.
* e.g. supno 0 1 2 2 3 3 3 4 4 4 4 4 (n=12)
* xsup 0 1 2 4 7 12
* Note: dfs will be performed on supernode rep. relative to the new
* row pivoting ordering
*
* (xlsub,lsub): lsub[*] contains the compressed subscript of
* rectangular supernodes; xlsub[j] points to the starting
* location of the j-th column in lsub[*]. Note that xlsub
* is indexed by column.
* Storage: original row subscripts
*
* During the course of sparse LU factorization, we also use
* (xlsub,lsub) for the purpose of symmetric pruning. For each
* supernode {s,s+1,...,t=s+r} with first column s and last
* column t, the subscript set
* lsub[j], j=xlsub[s], .., xlsub[s+1]-1
* is the structure of column s (i.e. structure of this supernode).
* It is used for the storage of numerical values.
* Furthermore,
* lsub[j], j=xlsub[t], .., xlsub[t+1]-1
* is the structure of the last column t of this supernode.
* It is for the purpose of symmetric pruning. Therefore, the
* structural subscripts can be rearranged without making physical
* interchanges among the numerical values.
*
* However, if the supernode has only one column, then we
* only keep one set of subscripts. For any subscript interchange
* performed, similar interchange must be done on the numerical
* values.
*
* The last column structures (for pruning) will be removed
* after the numercial LU factorization phase.
*
* (xlusup,lusup): lusup[*] contains the numerical values of the
* rectangular supernodes; xlusup[j] points to the starting
* location of the j-th column in storage vector lusup[*]
* Note: xlusup is indexed by column.
* Each rectangular supernode is stored by column-major
* scheme, consistent with Fortran 2-dim array storage.
*
* (xusub,ucol,usub): ucol[*] stores the numerical values of
* U-columns outside the rectangular supernodes. The row
* subscript of nonzero ucol[k] is stored in usub[k].
* xusub[i] points to the starting location of column i in ucol.
* Storage: new row subscripts; that is subscripts of PA.
*/
#ifndef EIGEN_LU_STRUCTS
#define EIGEN_LU_STRUCTS
namespace Eigen {
namespace internal {
typedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL} MemType;
template <typename IndexVector, typename ScalarVector>
struct LU_GlobalLU_t {
typedef typename IndexVector::Scalar StorageIndex;
IndexVector xsup; //First supernode column ... xsup(s) points to the beginning of the s-th supernode
IndexVector supno; // Supernode number corresponding to this column (column to supernode mapping)
ScalarVector lusup; // nonzero values of L ordered by columns
IndexVector lsub; // Compressed row indices of L rectangular supernodes.
IndexVector xlusup; // pointers to the beginning of each column in lusup
IndexVector xlsub; // pointers to the beginning of each column in lsub
Index nzlmax; // Current max size of lsub
Index nzlumax; // Current max size of lusup
ScalarVector ucol; // nonzero values of U ordered by columns
IndexVector usub; // row indices of U columns in ucol
IndexVector xusub; // Pointers to the beginning of each column of U in ucol
Index nzumax; // Current max size of ucol
Index n; // Number of columns in the matrix
Index num_expansions;
};
// Values to set for performance
struct perfvalues {
Index panel_size; // a panel consists of at most <panel_size> consecutive columns
Index relax; // To control degree of relaxing supernodes. If the number of nodes (columns)
// in a subtree of the elimination tree is less than relax, this subtree is considered
// as one supernode regardless of the row structures of those columns
Index maxsuper; // The maximum size for a supernode in complete LU
Index rowblk; // The minimum row dimension for 2-D blocking to be used;
Index colblk; // The minimum column dimension for 2-D blocking to be used;
Index fillfactor; // The estimated fills factors for L and U, compared with A
};
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_LU_STRUCTS

View File

@@ -0,0 +1,301 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_SPARSELU_SUPERNODAL_MATRIX_H
#define EIGEN_SPARSELU_SUPERNODAL_MATRIX_H
namespace Eigen {
namespace internal {
/** \ingroup SparseLU_Module
* \brief a class to manipulate the L supernodal factor from the SparseLU factorization
*
* This class contain the data to easily store
* and manipulate the supernodes during the factorization and solution phase of Sparse LU.
* Only the lower triangular matrix has supernodes.
*
* NOTE : This class corresponds to the SCformat structure in SuperLU
*
*/
/* TODO
* InnerIterator as for sparsematrix
* SuperInnerIterator to iterate through all supernodes
* Function for triangular solve
*/
template <typename _Scalar, typename _StorageIndex>
class MappedSuperNodalMatrix
{
public:
typedef _Scalar Scalar;
typedef _StorageIndex StorageIndex;
typedef Matrix<StorageIndex,Dynamic,1> IndexVector;
typedef Matrix<Scalar,Dynamic,1> ScalarVector;
public:
MappedSuperNodalMatrix()
{
}
MappedSuperNodalMatrix(Index m, Index n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind,
IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col )
{
setInfos(m, n, nzval, nzval_colptr, rowind, rowind_colptr, col_to_sup, sup_to_col);
}
~MappedSuperNodalMatrix()
{
}
/**
* Set appropriate pointers for the lower triangular supernodal matrix
* These infos are available at the end of the numerical factorization
* FIXME This class will be modified such that it can be use in the course
* of the factorization.
*/
void setInfos(Index m, Index n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind,
IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col )
{
m_row = m;
m_col = n;
m_nzval = nzval.data();
m_nzval_colptr = nzval_colptr.data();
m_rowind = rowind.data();
m_rowind_colptr = rowind_colptr.data();
m_nsuper = col_to_sup(n);
m_col_to_sup = col_to_sup.data();
m_sup_to_col = sup_to_col.data();
}
/**
* Number of rows
*/
Index rows() { return m_row; }
/**
* Number of columns
*/
Index cols() { return m_col; }
/**
* Return the array of nonzero values packed by column
*
* The size is nnz
*/
Scalar* valuePtr() { return m_nzval; }
const Scalar* valuePtr() const
{
return m_nzval;
}
/**
* Return the pointers to the beginning of each column in \ref valuePtr()
*/
StorageIndex* colIndexPtr()
{
return m_nzval_colptr;
}
const StorageIndex* colIndexPtr() const
{
return m_nzval_colptr;
}
/**
* Return the array of compressed row indices of all supernodes
*/
StorageIndex* rowIndex() { return m_rowind; }
const StorageIndex* rowIndex() const
{
return m_rowind;
}
/**
* Return the location in \em rowvaluePtr() which starts each column
*/
StorageIndex* rowIndexPtr() { return m_rowind_colptr; }
const StorageIndex* rowIndexPtr() const
{
return m_rowind_colptr;
}
/**
* Return the array of column-to-supernode mapping
*/
StorageIndex* colToSup() { return m_col_to_sup; }
const StorageIndex* colToSup() const
{
return m_col_to_sup;
}
/**
* Return the array of supernode-to-column mapping
*/
StorageIndex* supToCol() { return m_sup_to_col; }
const StorageIndex* supToCol() const
{
return m_sup_to_col;
}
/**
* Return the number of supernodes
*/
Index nsuper() const
{
return m_nsuper;
}
class InnerIterator;
template<typename Dest>
void solveInPlace( MatrixBase<Dest>&X) const;
protected:
Index m_row; // Number of rows
Index m_col; // Number of columns
Index m_nsuper; // Number of supernodes
Scalar* m_nzval; //array of nonzero values packed by column
StorageIndex* m_nzval_colptr; //nzval_colptr[j] Stores the location in nzval[] which starts column j
StorageIndex* m_rowind; // Array of compressed row indices of rectangular supernodes
StorageIndex* m_rowind_colptr; //rowind_colptr[j] stores the location in rowind[] which starts column j
StorageIndex* m_col_to_sup; // col_to_sup[j] is the supernode number to which column j belongs
StorageIndex* m_sup_to_col; //sup_to_col[s] points to the starting column of the s-th supernode
private :
};
/**
* \brief InnerIterator class to iterate over nonzero values of the current column in the supernodal matrix L
*
*/
template<typename Scalar, typename StorageIndex>
class MappedSuperNodalMatrix<Scalar,StorageIndex>::InnerIterator
{
public:
InnerIterator(const MappedSuperNodalMatrix& mat, Index outer)
: m_matrix(mat),
m_outer(outer),
m_supno(mat.colToSup()[outer]),
m_idval(mat.colIndexPtr()[outer]),
m_startidval(m_idval),
m_endidval(mat.colIndexPtr()[outer+1]),
m_idrow(mat.rowIndexPtr()[mat.supToCol()[mat.colToSup()[outer]]]),
m_endidrow(mat.rowIndexPtr()[mat.supToCol()[mat.colToSup()[outer]]+1])
{}
inline InnerIterator& operator++()
{
m_idval++;
m_idrow++;
return *this;
}
inline Scalar value() const { return m_matrix.valuePtr()[m_idval]; }
inline Scalar& valueRef() { return const_cast<Scalar&>(m_matrix.valuePtr()[m_idval]); }
inline Index index() const { return m_matrix.rowIndex()[m_idrow]; }
inline Index row() const { return index(); }
inline Index col() const { return m_outer; }
inline Index supIndex() const { return m_supno; }
inline operator bool() const
{
return ( (m_idval < m_endidval) && (m_idval >= m_startidval)
&& (m_idrow < m_endidrow) );
}
protected:
const MappedSuperNodalMatrix& m_matrix; // Supernodal lower triangular matrix
const Index m_outer; // Current column
const Index m_supno; // Current SuperNode number
Index m_idval; // Index to browse the values in the current column
const Index m_startidval; // Start of the column value
const Index m_endidval; // End of the column value
Index m_idrow; // Index to browse the row indices
Index m_endidrow; // End index of row indices of the current column
};
/**
* \brief Solve with the supernode triangular matrix
*
*/
template<typename Scalar, typename Index_>
template<typename Dest>
void MappedSuperNodalMatrix<Scalar,Index_>::solveInPlace( MatrixBase<Dest>&X) const
{
/* Explicit type conversion as the Index type of MatrixBase<Dest> may be wider than Index */
// eigen_assert(X.rows() <= NumTraits<Index>::highest());
// eigen_assert(X.cols() <= NumTraits<Index>::highest());
Index n = int(X.rows());
Index nrhs = Index(X.cols());
const Scalar * Lval = valuePtr(); // Nonzero values
Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor> work(n, nrhs); // working vector
work.setZero();
for (Index k = 0; k <= nsuper(); k ++)
{
Index fsupc = supToCol()[k]; // First column of the current supernode
Index istart = rowIndexPtr()[fsupc]; // Pointer index to the subscript of the current column
Index nsupr = rowIndexPtr()[fsupc+1] - istart; // Number of rows in the current supernode
Index nsupc = supToCol()[k+1] - fsupc; // Number of columns in the current supernode
Index nrow = nsupr - nsupc; // Number of rows in the non-diagonal part of the supernode
Index irow; //Current index row
if (nsupc == 1 )
{
for (Index j = 0; j < nrhs; j++)
{
InnerIterator it(*this, fsupc);
++it; // Skip the diagonal element
for (; it; ++it)
{
irow = it.row();
X(irow, j) -= X(fsupc, j) * it.value();
}
}
}
else
{
// The supernode has more than one column
Index luptr = colIndexPtr()[fsupc];
Index lda = colIndexPtr()[fsupc+1] - luptr;
// Triangular solve
Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(lda) );
Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) );
U = A.template triangularView<UnitLower>().solve(U);
// Matrix-vector product
new (&A) Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > ( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) );
work.topRows(nrow).noalias() = A * U;
//Begin Scatter
for (Index j = 0; j < nrhs; j++)
{
Index iptr = istart + nsupc;
for (Index i = 0; i < nrow; i++)
{
irow = rowIndex()[iptr];
X(irow, j) -= work(i, j); // Scatter operation
work(i, j) = Scalar(0);
iptr++;
}
}
}
}
}
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_SPARSELU_MATRIX_H

View File

@@ -0,0 +1,80 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_SPARSELU_UTILS_H
#define EIGEN_SPARSELU_UTILS_H
namespace Eigen {
namespace internal {
/**
* \brief Count Nonzero elements in the factors
*/
template <typename Scalar, typename StorageIndex>
void SparseLUImpl<Scalar,StorageIndex>::countnz(const Index n, Index& nnzL, Index& nnzU, GlobalLU_t& glu)
{
nnzL = 0;
nnzU = (glu.xusub)(n);
Index nsuper = (glu.supno)(n);
Index jlen;
Index i, j, fsupc;
if (n <= 0 ) return;
// For each supernode
for (i = 0; i <= nsuper; i++)
{
fsupc = glu.xsup(i);
jlen = glu.xlsub(fsupc+1) - glu.xlsub(fsupc);
for (j = fsupc; j < glu.xsup(i+1); j++)
{
nnzL += jlen;
nnzU += j - fsupc + 1;
jlen--;
}
}
}
/**
* \brief Fix up the data storage lsub for L-subscripts.
*
* It removes the subscripts sets for structural pruning,
* and applies permutation to the remaining subscripts
*
*/
template <typename Scalar, typename StorageIndex>
void SparseLUImpl<Scalar,StorageIndex>::fixupL(const Index n, const IndexVector& perm_r, GlobalLU_t& glu)
{
Index fsupc, i, j, k, jstart;
StorageIndex nextl = 0;
Index nsuper = (glu.supno)(n);
// For each supernode
for (i = 0; i <= nsuper; i++)
{
fsupc = glu.xsup(i);
jstart = glu.xlsub(fsupc);
glu.xlsub(fsupc) = nextl;
for (j = jstart; j < glu.xlsub(fsupc + 1); j++)
{
glu.lsub(nextl) = perm_r(glu.lsub(j)); // Now indexed into P*A
nextl++;
}
for (k = fsupc+1; k < glu.xsup(i+1); k++)
glu.xlsub(k) = nextl; // other columns in supernode i
}
glu.xlsub(n) = nextl;
}
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_SPARSELU_UTILS_H

View File

@@ -0,0 +1,181 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
/*
* NOTE: This file is the modified version of xcolumn_bmod.c file in SuperLU
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* October 15, 2003
*
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
*
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
*
* Permission is hereby granted to use or copy this program for any
* purpose, provided the above notices are retained on all copies.
* Permission to modify the code and to distribute modified code is
* granted, provided the above notices are retained, and a notice that
* the code was modified is included with the above copyright notice.
*/
#ifndef SPARSELU_COLUMN_BMOD_H
#define SPARSELU_COLUMN_BMOD_H
namespace Eigen {
namespace internal {
/**
* \brief Performs numeric block updates (sup-col) in topological order
*
* \param jcol current column to update
* \param nseg Number of segments in the U part
* \param dense Store the full representation of the column
* \param tempv working array
* \param segrep segment representative ...
* \param repfnz ??? First nonzero column in each row ??? ...
* \param fpanelc First column in the current panel
* \param glu Global LU data.
* \return 0 - successful return
* > 0 - number of bytes allocated when run out of space
*
*/
template <typename Scalar, typename StorageIndex>
Index SparseLUImpl<Scalar,StorageIndex>::column_bmod(const Index jcol, const Index nseg, BlockScalarVector dense, ScalarVector& tempv,
BlockIndexVector segrep, BlockIndexVector repfnz, Index fpanelc, GlobalLU_t& glu)
{
Index jsupno, k, ksub, krep, ksupno;
Index lptr, nrow, isub, irow, nextlu, new_next, ufirst;
Index fsupc, nsupc, nsupr, luptr, kfnz, no_zeros;
/* krep = representative of current k-th supernode
* fsupc = first supernodal column
* nsupc = number of columns in a supernode
* nsupr = number of rows in a supernode
* luptr = location of supernodal LU-block in storage
* kfnz = first nonz in the k-th supernodal segment
* no_zeros = no lf leading zeros in a supernodal U-segment
*/
jsupno = glu.supno(jcol);
// For each nonzero supernode segment of U[*,j] in topological order
k = nseg - 1;
Index d_fsupc; // distance between the first column of the current panel and the
// first column of the current snode
Index fst_col; // First column within small LU update
Index segsize;
for (ksub = 0; ksub < nseg; ksub++)
{
krep = segrep(k); k--;
ksupno = glu.supno(krep);
if (jsupno != ksupno )
{
// outside the rectangular supernode
fsupc = glu.xsup(ksupno);
fst_col = (std::max)(fsupc, fpanelc);
// Distance from the current supernode to the current panel;
// d_fsupc = 0 if fsupc > fpanelc
d_fsupc = fst_col - fsupc;
luptr = glu.xlusup(fst_col) + d_fsupc;
lptr = glu.xlsub(fsupc) + d_fsupc;
kfnz = repfnz(krep);
kfnz = (std::max)(kfnz, fpanelc);
segsize = krep - kfnz + 1;
nsupc = krep - fst_col + 1;
nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc);
nrow = nsupr - d_fsupc - nsupc;
Index lda = glu.xlusup(fst_col+1) - glu.xlusup(fst_col);
// Perform a triangular solver and block update,
// then scatter the result of sup-col update to dense
no_zeros = kfnz - fst_col;
if(segsize==1)
LU_kernel_bmod<1>::run(segsize, dense, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
else
LU_kernel_bmod<Dynamic>::run(segsize, dense, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
} // end if jsupno
} // end for each segment
// Process the supernodal portion of L\U[*,j]
nextlu = glu.xlusup(jcol);
fsupc = glu.xsup(jsupno);
// copy the SPA dense into L\U[*,j]
Index mem;
new_next = nextlu + glu.xlsub(fsupc + 1) - glu.xlsub(fsupc);
Index offset = internal::first_multiple<Index>(new_next, internal::packet_traits<Scalar>::size) - new_next;
if(offset)
new_next += offset;
while (new_next > glu.nzlumax )
{
mem = memXpand<ScalarVector>(glu.lusup, glu.nzlumax, nextlu, LUSUP, glu.num_expansions);
if (mem) return mem;
}
for (isub = glu.xlsub(fsupc); isub < glu.xlsub(fsupc+1); isub++)
{
irow = glu.lsub(isub);
glu.lusup(nextlu) = dense(irow);
dense(irow) = Scalar(0.0);
++nextlu;
}
if(offset)
{
glu.lusup.segment(nextlu,offset).setZero();
nextlu += offset;
}
glu.xlusup(jcol + 1) = StorageIndex(nextlu); // close L\U(*,jcol);
/* For more updates within the panel (also within the current supernode),
* should start from the first column of the panel, or the first column
* of the supernode, whichever is bigger. There are two cases:
* 1) fsupc < fpanelc, then fst_col <-- fpanelc
* 2) fsupc >= fpanelc, then fst_col <-- fsupc
*/
fst_col = (std::max)(fsupc, fpanelc);
if (fst_col < jcol)
{
// Distance between the current supernode and the current panel
// d_fsupc = 0 if fsupc >= fpanelc
d_fsupc = fst_col - fsupc;
lptr = glu.xlsub(fsupc) + d_fsupc;
luptr = glu.xlusup(fst_col) + d_fsupc;
nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); // leading dimension
nsupc = jcol - fst_col; // excluding jcol
nrow = nsupr - d_fsupc - nsupc;
// points to the beginning of jcol in snode L\U(jsupno)
ufirst = glu.xlusup(jcol) + d_fsupc;
Index lda = glu.xlusup(jcol+1) - glu.xlusup(jcol);
MappedMatrixBlock A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(lda) );
VectorBlock<ScalarVector> u(glu.lusup, ufirst, nsupc);
u = A.template triangularView<UnitLower>().solve(u);
new (&A) MappedMatrixBlock ( &(glu.lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) );
VectorBlock<ScalarVector> l(glu.lusup, ufirst+nsupc, nrow);
l.noalias() -= A * u;
} // End if fst_col
return 0;
}
} // end namespace internal
} // end namespace Eigen
#endif // SPARSELU_COLUMN_BMOD_H

View File

@@ -0,0 +1,179 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
/*
* NOTE: This file is the modified version of [s,d,c,z]column_dfs.c file in SuperLU
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* November 15, 1997
*
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
*
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
*
* Permission is hereby granted to use or copy this program for any
* purpose, provided the above notices are retained on all copies.
* Permission to modify the code and to distribute modified code is
* granted, provided the above notices are retained, and a notice that
* the code was modified is included with the above copyright notice.
*/
#ifndef SPARSELU_COLUMN_DFS_H
#define SPARSELU_COLUMN_DFS_H
template <typename Scalar, typename StorageIndex> class SparseLUImpl;
namespace Eigen {
namespace internal {
template<typename IndexVector, typename ScalarVector>
struct column_dfs_traits : no_assignment_operator
{
typedef typename ScalarVector::Scalar Scalar;
typedef typename IndexVector::Scalar StorageIndex;
column_dfs_traits(Index jcol, Index& jsuper, typename SparseLUImpl<Scalar, StorageIndex>::GlobalLU_t& glu, SparseLUImpl<Scalar, StorageIndex>& luImpl)
: m_jcol(jcol), m_jsuper_ref(jsuper), m_glu(glu), m_luImpl(luImpl)
{}
bool update_segrep(Index /*krep*/, Index /*jj*/)
{
return true;
}
void mem_expand(IndexVector& lsub, Index& nextl, Index chmark)
{
if (nextl >= m_glu.nzlmax)
m_luImpl.memXpand(lsub, m_glu.nzlmax, nextl, LSUB, m_glu.num_expansions);
if (chmark != (m_jcol-1)) m_jsuper_ref = emptyIdxLU;
}
enum { ExpandMem = true };
Index m_jcol;
Index& m_jsuper_ref;
typename SparseLUImpl<Scalar, StorageIndex>::GlobalLU_t& m_glu;
SparseLUImpl<Scalar, StorageIndex>& m_luImpl;
};
/**
* \brief Performs a symbolic factorization on column jcol and decide the supernode boundary
*
* A supernode representative is the last column of a supernode.
* The nonzeros in U[*,j] are segments that end at supernodes representatives.
* The routine returns a list of the supernodal representatives
* in topological order of the dfs that generates them.
* The location of the first nonzero in each supernodal segment
* (supernodal entry location) is also returned.
*
* \param m number of rows in the matrix
* \param jcol Current column
* \param perm_r Row permutation
* \param maxsuper Maximum number of column allowed in a supernode
* \param [in,out] nseg Number of segments in current U[*,j] - new segments appended
* \param lsub_col defines the rhs vector to start the dfs
* \param [in,out] segrep Segment representatives - new segments appended
* \param repfnz First nonzero location in each row
* \param xprune
* \param marker marker[i] == jj, if i was visited during dfs of current column jj;
* \param parent
* \param xplore working array
* \param glu global LU data
* \return 0 success
* > 0 number of bytes allocated when run out of space
*
*/
template <typename Scalar, typename StorageIndex>
Index SparseLUImpl<Scalar,StorageIndex>::column_dfs(const Index m, const Index jcol, IndexVector& perm_r, Index maxsuper, Index& nseg,
BlockIndexVector lsub_col, IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune,
IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu)
{
Index jsuper = glu.supno(jcol);
Index nextl = glu.xlsub(jcol);
VectorBlock<IndexVector> marker2(marker, 2*m, m);
column_dfs_traits<IndexVector, ScalarVector> traits(jcol, jsuper, glu, *this);
// For each nonzero in A(*,jcol) do dfs
for (Index k = 0; ((k < m) ? lsub_col[k] != emptyIdxLU : false) ; k++)
{
Index krow = lsub_col(k);
lsub_col(k) = emptyIdxLU;
Index kmark = marker2(krow);
// krow was visited before, go to the next nonz;
if (kmark == jcol) continue;
dfs_kernel(StorageIndex(jcol), perm_r, nseg, glu.lsub, segrep, repfnz, xprune, marker2, parent,
xplore, glu, nextl, krow, traits);
} // for each nonzero ...
Index fsupc;
StorageIndex nsuper = glu.supno(jcol);
StorageIndex jcolp1 = StorageIndex(jcol) + 1;
Index jcolm1 = jcol - 1;
// check to see if j belongs in the same supernode as j-1
if ( jcol == 0 )
{ // Do nothing for column 0
nsuper = glu.supno(0) = 0 ;
}
else
{
fsupc = glu.xsup(nsuper);
StorageIndex jptr = glu.xlsub(jcol); // Not yet compressed
StorageIndex jm1ptr = glu.xlsub(jcolm1);
// Use supernodes of type T2 : see SuperLU paper
if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = emptyIdxLU;
// Make sure the number of columns in a supernode doesn't
// exceed threshold
if ( (jcol - fsupc) >= maxsuper) jsuper = emptyIdxLU;
/* If jcol starts a new supernode, reclaim storage space in
* glu.lsub from previous supernode. Note we only store
* the subscript set of the first and last columns of
* a supernode. (first for num values, last for pruning)
*/
if (jsuper == emptyIdxLU)
{ // starts a new supernode
if ( (fsupc < jcolm1-1) )
{ // >= 3 columns in nsuper
StorageIndex ito = glu.xlsub(fsupc+1);
glu.xlsub(jcolm1) = ito;
StorageIndex istop = ito + jptr - jm1ptr;
xprune(jcolm1) = istop; // intialize xprune(jcol-1)
glu.xlsub(jcol) = istop;
for (StorageIndex ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito)
glu.lsub(ito) = glu.lsub(ifrom);
nextl = ito; // = istop + length(jcol)
}
nsuper++;
glu.supno(jcol) = nsuper;
} // if a new supernode
} // end else: jcol > 0
// Tidy up the pointers before exit
glu.xsup(nsuper+1) = jcolp1;
glu.supno(jcolp1) = nsuper;
xprune(jcol) = StorageIndex(nextl); // Intialize upper bound for pruning
glu.xlsub(jcolp1) = StorageIndex(nextl);
return 0;
}
} // end namespace internal
} // end namespace Eigen
#endif

View File

@@ -0,0 +1,107 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
/*
* NOTE: This file is the modified version of [s,d,c,z]copy_to_ucol.c file in SuperLU
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* November 15, 1997
*
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
*
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
*
* Permission is hereby granted to use or copy this program for any
* purpose, provided the above notices are retained on all copies.
* Permission to modify the code and to distribute modified code is
* granted, provided the above notices are retained, and a notice that
* the code was modified is included with the above copyright notice.
*/
#ifndef SPARSELU_COPY_TO_UCOL_H
#define SPARSELU_COPY_TO_UCOL_H
namespace Eigen {
namespace internal {
/**
* \brief Performs numeric block updates (sup-col) in topological order
*
* \param jcol current column to update
* \param nseg Number of segments in the U part
* \param segrep segment representative ...
* \param repfnz First nonzero column in each row ...
* \param perm_r Row permutation
* \param dense Store the full representation of the column
* \param glu Global LU data.
* \return 0 - successful return
* > 0 - number of bytes allocated when run out of space
*
*/
template <typename Scalar, typename StorageIndex>
Index SparseLUImpl<Scalar,StorageIndex>::copy_to_ucol(const Index jcol, const Index nseg, IndexVector& segrep,
BlockIndexVector repfnz ,IndexVector& perm_r, BlockScalarVector dense, GlobalLU_t& glu)
{
Index ksub, krep, ksupno;
Index jsupno = glu.supno(jcol);
// For each nonzero supernode segment of U[*,j] in topological order
Index k = nseg - 1, i;
StorageIndex nextu = glu.xusub(jcol);
Index kfnz, isub, segsize;
Index new_next,irow;
Index fsupc, mem;
for (ksub = 0; ksub < nseg; ksub++)
{
krep = segrep(k); k--;
ksupno = glu.supno(krep);
if (jsupno != ksupno ) // should go into ucol();
{
kfnz = repfnz(krep);
if (kfnz != emptyIdxLU)
{ // Nonzero U-segment
fsupc = glu.xsup(ksupno);
isub = glu.xlsub(fsupc) + kfnz - fsupc;
segsize = krep - kfnz + 1;
new_next = nextu + segsize;
while (new_next > glu.nzumax)
{
mem = memXpand<ScalarVector>(glu.ucol, glu.nzumax, nextu, UCOL, glu.num_expansions);
if (mem) return mem;
mem = memXpand<IndexVector>(glu.usub, glu.nzumax, nextu, USUB, glu.num_expansions);
if (mem) return mem;
}
for (i = 0; i < segsize; i++)
{
irow = glu.lsub(isub);
glu.usub(nextu) = perm_r(irow); // Unlike the L part, the U part is stored in its final order
glu.ucol(nextu) = dense(irow);
dense(irow) = Scalar(0.0);
nextu++;
isub++;
}
} // end nonzero U-segment
} // end if jsupno
} // end for each segment
glu.xusub(jcol + 1) = nextu; // close U(*,jcol)
return 0;
}
} // namespace internal
} // end namespace Eigen
#endif // SPARSELU_COPY_TO_UCOL_H

View File

@@ -0,0 +1,280 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_SPARSELU_GEMM_KERNEL_H
#define EIGEN_SPARSELU_GEMM_KERNEL_H
namespace Eigen {
namespace internal {
/** \internal
* A general matrix-matrix product kernel optimized for the SparseLU factorization.
* - A, B, and C must be column major
* - lda and ldc must be multiples of the respective packet size
* - C must have the same alignment as A
*/
template<typename Scalar>
EIGEN_DONT_INLINE
void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const Scalar* B, Index ldb, Scalar* C, Index ldc)
{
using namespace Eigen::internal;
typedef typename packet_traits<Scalar>::type Packet;
enum {
NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
PacketSize = packet_traits<Scalar>::size,
PM = 8, // peeling in M
RN = 2, // register blocking
RK = NumberOfRegisters>=16 ? 4 : 2, // register blocking
BM = 4096/sizeof(Scalar), // number of rows of A-C per chunk
SM = PM*PacketSize // step along M
};
Index d_end = (d/RK)*RK; // number of columns of A (rows of B) suitable for full register blocking
Index n_end = (n/RN)*RN; // number of columns of B-C suitable for processing RN columns at once
Index i0 = internal::first_default_aligned(A,m);
eigen_internal_assert(((lda%PacketSize)==0) && ((ldc%PacketSize)==0) && (i0==internal::first_default_aligned(C,m)));
// handle the non aligned rows of A and C without any optimization:
for(Index i=0; i<i0; ++i)
{
for(Index j=0; j<n; ++j)
{
Scalar c = C[i+j*ldc];
for(Index k=0; k<d; ++k)
c += B[k+j*ldb] * A[i+k*lda];
C[i+j*ldc] = c;
}
}
// process the remaining rows per chunk of BM rows
for(Index ib=i0; ib<m; ib+=BM)
{
Index actual_b = std::min<Index>(BM, m-ib); // actual number of rows
Index actual_b_end1 = (actual_b/SM)*SM; // actual number of rows suitable for peeling
Index actual_b_end2 = (actual_b/PacketSize)*PacketSize; // actual number of rows suitable for vectorization
// Let's process two columns of B-C at once
for(Index j=0; j<n_end; j+=RN)
{
const Scalar* Bc0 = B+(j+0)*ldb;
const Scalar* Bc1 = B+(j+1)*ldb;
for(Index k=0; k<d_end; k+=RK)
{
// load and expand a RN x RK block of B
Packet b00, b10, b20, b30, b01, b11, b21, b31;
{ b00 = pset1<Packet>(Bc0[0]); }
{ b10 = pset1<Packet>(Bc0[1]); }
if(RK==4) { b20 = pset1<Packet>(Bc0[2]); }
if(RK==4) { b30 = pset1<Packet>(Bc0[3]); }
{ b01 = pset1<Packet>(Bc1[0]); }
{ b11 = pset1<Packet>(Bc1[1]); }
if(RK==4) { b21 = pset1<Packet>(Bc1[2]); }
if(RK==4) { b31 = pset1<Packet>(Bc1[3]); }
Packet a0, a1, a2, a3, c0, c1, t0, t1;
const Scalar* A0 = A+ib+(k+0)*lda;
const Scalar* A1 = A+ib+(k+1)*lda;
const Scalar* A2 = A+ib+(k+2)*lda;
const Scalar* A3 = A+ib+(k+3)*lda;
Scalar* C0 = C+ib+(j+0)*ldc;
Scalar* C1 = C+ib+(j+1)*ldc;
a0 = pload<Packet>(A0);
a1 = pload<Packet>(A1);
if(RK==4)
{
a2 = pload<Packet>(A2);
a3 = pload<Packet>(A3);
}
else
{
// workaround "may be used uninitialized in this function" warning
a2 = a3 = a0;
}
#define KMADD(c, a, b, tmp) {tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);}
#define WORK(I) \
c0 = pload<Packet>(C0+i+(I)*PacketSize); \
c1 = pload<Packet>(C1+i+(I)*PacketSize); \
KMADD(c0, a0, b00, t0) \
KMADD(c1, a0, b01, t1) \
a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
KMADD(c0, a1, b10, t0) \
KMADD(c1, a1, b11, t1) \
a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
if(RK==4){ KMADD(c0, a2, b20, t0) }\
if(RK==4){ KMADD(c1, a2, b21, t1) }\
if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize); }\
if(RK==4){ KMADD(c0, a3, b30, t0) }\
if(RK==4){ KMADD(c1, a3, b31, t1) }\
if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\
pstore(C0+i+(I)*PacketSize, c0); \
pstore(C1+i+(I)*PacketSize, c1)
// process rows of A' - C' with aggressive vectorization and peeling
for(Index i=0; i<actual_b_end1; i+=PacketSize*8)
{
EIGEN_ASM_COMMENT("SPARSELU_GEMML_KERNEL1");
prefetch((A0+i+(5)*PacketSize));
prefetch((A1+i+(5)*PacketSize));
if(RK==4) prefetch((A2+i+(5)*PacketSize));
if(RK==4) prefetch((A3+i+(5)*PacketSize));
WORK(0);
WORK(1);
WORK(2);
WORK(3);
WORK(4);
WORK(5);
WORK(6);
WORK(7);
}
// process the remaining rows with vectorization only
for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize)
{
WORK(0);
}
#undef WORK
// process the remaining rows without vectorization
for(Index i=actual_b_end2; i<actual_b; ++i)
{
if(RK==4)
{
C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1]+A2[i]*Bc0[2]+A3[i]*Bc0[3];
C1[i] += A0[i]*Bc1[0]+A1[i]*Bc1[1]+A2[i]*Bc1[2]+A3[i]*Bc1[3];
}
else
{
C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1];
C1[i] += A0[i]*Bc1[0]+A1[i]*Bc1[1];
}
}
Bc0 += RK;
Bc1 += RK;
} // peeled loop on k
} // peeled loop on the columns j
// process the last column (we now perform a matrix-vector product)
if((n-n_end)>0)
{
const Scalar* Bc0 = B+(n-1)*ldb;
for(Index k=0; k<d_end; k+=RK)
{
// load and expand a 1 x RK block of B
Packet b00, b10, b20, b30;
b00 = pset1<Packet>(Bc0[0]);
b10 = pset1<Packet>(Bc0[1]);
if(RK==4) b20 = pset1<Packet>(Bc0[2]);
if(RK==4) b30 = pset1<Packet>(Bc0[3]);
Packet a0, a1, a2, a3, c0, t0/*, t1*/;
const Scalar* A0 = A+ib+(k+0)*lda;
const Scalar* A1 = A+ib+(k+1)*lda;
const Scalar* A2 = A+ib+(k+2)*lda;
const Scalar* A3 = A+ib+(k+3)*lda;
Scalar* C0 = C+ib+(n_end)*ldc;
a0 = pload<Packet>(A0);
a1 = pload<Packet>(A1);
if(RK==4)
{
a2 = pload<Packet>(A2);
a3 = pload<Packet>(A3);
}
else
{
// workaround "may be used uninitialized in this function" warning
a2 = a3 = a0;
}
#define WORK(I) \
c0 = pload<Packet>(C0+i+(I)*PacketSize); \
KMADD(c0, a0, b00, t0) \
a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
KMADD(c0, a1, b10, t0) \
a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
if(RK==4){ KMADD(c0, a2, b20, t0) }\
if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize); }\
if(RK==4){ KMADD(c0, a3, b30, t0) }\
if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\
pstore(C0+i+(I)*PacketSize, c0);
// agressive vectorization and peeling
for(Index i=0; i<actual_b_end1; i+=PacketSize*8)
{
EIGEN_ASM_COMMENT("SPARSELU_GEMML_KERNEL2");
WORK(0);
WORK(1);
WORK(2);
WORK(3);
WORK(4);
WORK(5);
WORK(6);
WORK(7);
}
// vectorization only
for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize)
{
WORK(0);
}
// remaining scalars
for(Index i=actual_b_end2; i<actual_b; ++i)
{
if(RK==4)
C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1]+A2[i]*Bc0[2]+A3[i]*Bc0[3];
else
C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1];
}
Bc0 += RK;
#undef WORK
}
}
// process the last columns of A, corresponding to the last rows of B
Index rd = d-d_end;
if(rd>0)
{
for(Index j=0; j<n; ++j)
{
enum {
Alignment = PacketSize>1 ? Aligned : 0
};
typedef Map<Matrix<Scalar,Dynamic,1>, Alignment > MapVector;
typedef Map<const Matrix<Scalar,Dynamic,1>, Alignment > ConstMapVector;
if(rd==1) MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b);
else if(rd==2) MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b)
+ B[1+d_end+j*ldb] * ConstMapVector(A+(d_end+1)*lda+ib, actual_b);
else MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b)
+ B[1+d_end+j*ldb] * ConstMapVector(A+(d_end+1)*lda+ib, actual_b)
+ B[2+d_end+j*ldb] * ConstMapVector(A+(d_end+2)*lda+ib, actual_b);
}
}
} // blocking on the rows of A and C
}
#undef KMADD
} // namespace internal
} // namespace Eigen
#endif // EIGEN_SPARSELU_GEMM_KERNEL_H

View File

@@ -0,0 +1,126 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
/* This file is a modified version of heap_relax_snode.c file in SuperLU
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* October 15, 2003
*
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
*
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
*
* Permission is hereby granted to use or copy this program for any
* purpose, provided the above notices are retained on all copies.
* Permission to modify the code and to distribute modified code is
* granted, provided the above notices are retained, and a notice that
* the code was modified is included with the above copyright notice.
*/
#ifndef SPARSELU_HEAP_RELAX_SNODE_H
#define SPARSELU_HEAP_RELAX_SNODE_H
namespace Eigen {
namespace internal {
/**
* \brief Identify the initial relaxed supernodes
*
* This routine applied to a symmetric elimination tree.
* It assumes that the matrix has been reordered according to the postorder of the etree
* \param n The number of columns
* \param et elimination tree
* \param relax_columns Maximum number of columns allowed in a relaxed snode
* \param descendants Number of descendants of each node in the etree
* \param relax_end last column in a supernode
*/
template <typename Scalar, typename StorageIndex>
void SparseLUImpl<Scalar,StorageIndex>::heap_relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end)
{
// The etree may not be postordered, but its heap ordered
IndexVector post;
internal::treePostorder(StorageIndex(n), et, post); // Post order etree
IndexVector inv_post(n+1);
for (StorageIndex i = 0; i < n+1; ++i) inv_post(post(i)) = i; // inv_post = post.inverse()???
// Renumber etree in postorder
IndexVector iwork(n);
IndexVector et_save(n+1);
for (Index i = 0; i < n; ++i)
{
iwork(post(i)) = post(et(i));
}
et_save = et; // Save the original etree
et = iwork;
// compute the number of descendants of each node in the etree
relax_end.setConstant(emptyIdxLU);
Index j, parent;
descendants.setZero();
for (j = 0; j < n; j++)
{
parent = et(j);
if (parent != n) // not the dummy root
descendants(parent) += descendants(j) + 1;
}
// Identify the relaxed supernodes by postorder traversal of the etree
Index snode_start; // beginning of a snode
StorageIndex k;
Index nsuper_et_post = 0; // Number of relaxed snodes in postordered etree
Index nsuper_et = 0; // Number of relaxed snodes in the original etree
StorageIndex l;
for (j = 0; j < n; )
{
parent = et(j);
snode_start = j;
while ( parent != n && descendants(parent) < relax_columns )
{
j = parent;
parent = et(j);
}
// Found a supernode in postordered etree, j is the last column
++nsuper_et_post;
k = StorageIndex(n);
for (Index i = snode_start; i <= j; ++i)
k = (std::min)(k, inv_post(i));
l = inv_post(j);
if ( (l - k) == (j - snode_start) ) // Same number of columns in the snode
{
// This is also a supernode in the original etree
relax_end(k) = l; // Record last column
++nsuper_et;
}
else
{
for (Index i = snode_start; i <= j; ++i)
{
l = inv_post(i);
if (descendants(i) == 0)
{
relax_end(l) = l;
++nsuper_et;
}
}
}
j++;
// Search for a new leaf
while (descendants(j) != 0 && j < n) j++;
} // End postorder traversal of the etree
// Recover the original etree
et = et_save;
}
} // end namespace internal
} // end namespace Eigen
#endif // SPARSELU_HEAP_RELAX_SNODE_H

View File

@@ -0,0 +1,130 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef SPARSELU_KERNEL_BMOD_H
#define SPARSELU_KERNEL_BMOD_H
namespace Eigen {
namespace internal {
template <int SegSizeAtCompileTime> struct LU_kernel_bmod
{
/** \internal
* \brief Performs numeric block updates from a given supernode to a single column
*
* \param segsize Size of the segment (and blocks ) to use for updates
* \param[in,out] dense Packed values of the original matrix
* \param tempv temporary vector to use for updates
* \param lusup array containing the supernodes
* \param lda Leading dimension in the supernode
* \param nrow Number of rows in the rectangular part of the supernode
* \param lsub compressed row subscripts of supernodes
* \param lptr pointer to the first column of the current supernode in lsub
* \param no_zeros Number of nonzeros elements before the diagonal part of the supernode
*/
template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
static EIGEN_DONT_INLINE void run(const Index segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros);
};
template <int SegSizeAtCompileTime>
template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
EIGEN_DONT_INLINE void LU_kernel_bmod<SegSizeAtCompileTime>::run(const Index segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros)
{
typedef typename ScalarVector::Scalar Scalar;
// First, copy U[*,j] segment from dense(*) to tempv(*)
// The result of triangular solve is in tempv[*];
// The result of matric-vector update is in dense[*]
Index isub = lptr + no_zeros;
Index i;
Index irow;
for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)
{
irow = lsub(isub);
tempv(i) = dense(irow);
++isub;
}
// Dense triangular solve -- start effective triangle
luptr += lda * no_zeros + no_zeros;
// Form Eigen matrix and vector
Map<Matrix<Scalar,SegSizeAtCompileTime,SegSizeAtCompileTime, ColMajor>, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(lda) );
Map<Matrix<Scalar,SegSizeAtCompileTime,1> > u(tempv.data(), segsize);
u = A.template triangularView<UnitLower>().solve(u);
// Dense matrix-vector product y <-- B*x
luptr += segsize;
const Index PacketSize = internal::packet_traits<Scalar>::size;
Index ldl = internal::first_multiple(nrow, PacketSize);
Map<Matrix<Scalar,Dynamic,SegSizeAtCompileTime, ColMajor>, 0, OuterStride<> > B( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(lda) );
Index aligned_offset = internal::first_default_aligned(tempv.data()+segsize, PacketSize);
Index aligned_with_B_offset = (PacketSize-internal::first_default_aligned(B.data(), PacketSize))%PacketSize;
Map<Matrix<Scalar,Dynamic,1>, 0, OuterStride<> > l(tempv.data()+segsize+aligned_offset+aligned_with_B_offset, nrow, OuterStride<>(ldl) );
l.setZero();
internal::sparselu_gemm<Scalar>(l.rows(), l.cols(), B.cols(), B.data(), B.outerStride(), u.data(), u.outerStride(), l.data(), l.outerStride());
// Scatter tempv[] into SPA dense[] as a temporary storage
isub = lptr + no_zeros;
for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)
{
irow = lsub(isub++);
dense(irow) = tempv(i);
}
// Scatter l into SPA dense[]
for (i = 0; i < nrow; i++)
{
irow = lsub(isub++);
dense(irow) -= l(i);
}
}
template <> struct LU_kernel_bmod<1>
{
template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
static EIGEN_DONT_INLINE void run(const Index /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr,
const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros);
};
template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
EIGEN_DONT_INLINE void LU_kernel_bmod<1>::run(const Index /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr,
const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros)
{
typedef typename ScalarVector::Scalar Scalar;
typedef typename IndexVector::Scalar StorageIndex;
Scalar f = dense(lsub(lptr + no_zeros));
luptr += lda * no_zeros + no_zeros + 1;
const Scalar* a(lusup.data() + luptr);
const StorageIndex* irow(lsub.data()+lptr + no_zeros + 1);
Index i = 0;
for (; i+1 < nrow; i+=2)
{
Index i0 = *(irow++);
Index i1 = *(irow++);
Scalar a0 = *(a++);
Scalar a1 = *(a++);
Scalar d0 = dense.coeff(i0);
Scalar d1 = dense.coeff(i1);
d0 -= f*a0;
d1 -= f*a1;
dense.coeffRef(i0) = d0;
dense.coeffRef(i1) = d1;
}
if(i<nrow)
dense.coeffRef(*(irow++)) -= f * *(a++);
}
} // end namespace internal
} // end namespace Eigen
#endif // SPARSELU_KERNEL_BMOD_H

View File

@@ -0,0 +1,223 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
/*
* NOTE: This file is the modified version of [s,d,c,z]panel_bmod.c file in SuperLU
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* October 15, 2003
*
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
*
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
*
* Permission is hereby granted to use or copy this program for any
* purpose, provided the above notices are retained on all copies.
* Permission to modify the code and to distribute modified code is
* granted, provided the above notices are retained, and a notice that
* the code was modified is included with the above copyright notice.
*/
#ifndef SPARSELU_PANEL_BMOD_H
#define SPARSELU_PANEL_BMOD_H
namespace Eigen {
namespace internal {
/**
* \brief Performs numeric block updates (sup-panel) in topological order.
*
* Before entering this routine, the original nonzeros in the panel
* were already copied i nto the spa[m,w]
*
* \param m number of rows in the matrix
* \param w Panel size
* \param jcol Starting column of the panel
* \param nseg Number of segments in the U part
* \param dense Store the full representation of the panel
* \param tempv working array
* \param segrep segment representative... first row in the segment
* \param repfnz First nonzero rows
* \param glu Global LU data.
*
*
*/
template <typename Scalar, typename StorageIndex>
void SparseLUImpl<Scalar,StorageIndex>::panel_bmod(const Index m, const Index w, const Index jcol,
const Index nseg, ScalarVector& dense, ScalarVector& tempv,
IndexVector& segrep, IndexVector& repfnz, GlobalLU_t& glu)
{
Index ksub,jj,nextl_col;
Index fsupc, nsupc, nsupr, nrow;
Index krep, kfnz;
Index lptr; // points to the row subscripts of a supernode
Index luptr; // ...
Index segsize,no_zeros ;
// For each nonz supernode segment of U[*,j] in topological order
Index k = nseg - 1;
const Index PacketSize = internal::packet_traits<Scalar>::size;
for (ksub = 0; ksub < nseg; ksub++)
{ // For each updating supernode
/* krep = representative of current k-th supernode
* fsupc = first supernodal column
* nsupc = number of columns in a supernode
* nsupr = number of rows in a supernode
*/
krep = segrep(k); k--;
fsupc = glu.xsup(glu.supno(krep));
nsupc = krep - fsupc + 1;
nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc);
nrow = nsupr - nsupc;
lptr = glu.xlsub(fsupc);
// loop over the panel columns to detect the actual number of columns and rows
Index u_rows = 0;
Index u_cols = 0;
for (jj = jcol; jj < jcol + w; jj++)
{
nextl_col = (jj-jcol) * m;
VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
kfnz = repfnz_col(krep);
if ( kfnz == emptyIdxLU )
continue; // skip any zero segment
segsize = krep - kfnz + 1;
u_cols++;
u_rows = (std::max)(segsize,u_rows);
}
if(nsupc >= 2)
{
Index ldu = internal::first_multiple<Index>(u_rows, PacketSize);
Map<ScalarMatrix, Aligned, OuterStride<> > U(tempv.data(), u_rows, u_cols, OuterStride<>(ldu));
// gather U
Index u_col = 0;
for (jj = jcol; jj < jcol + w; jj++)
{
nextl_col = (jj-jcol) * m;
VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here
kfnz = repfnz_col(krep);
if ( kfnz == emptyIdxLU )
continue; // skip any zero segment
segsize = krep - kfnz + 1;
luptr = glu.xlusup(fsupc);
no_zeros = kfnz - fsupc;
Index isub = lptr + no_zeros;
Index off = u_rows-segsize;
for (Index i = 0; i < off; i++) U(i,u_col) = 0;
for (Index i = 0; i < segsize; i++)
{
Index irow = glu.lsub(isub);
U(i+off,u_col) = dense_col(irow);
++isub;
}
u_col++;
}
// solve U = A^-1 U
luptr = glu.xlusup(fsupc);
Index lda = glu.xlusup(fsupc+1) - glu.xlusup(fsupc);
no_zeros = (krep - u_rows + 1) - fsupc;
luptr += lda * no_zeros + no_zeros;
MappedMatrixBlock A(glu.lusup.data()+luptr, u_rows, u_rows, OuterStride<>(lda) );
U = A.template triangularView<UnitLower>().solve(U);
// update
luptr += u_rows;
MappedMatrixBlock B(glu.lusup.data()+luptr, nrow, u_rows, OuterStride<>(lda) );
eigen_assert(tempv.size()>w*ldu + nrow*w + 1);
Index ldl = internal::first_multiple<Index>(nrow, PacketSize);
Index offset = (PacketSize-internal::first_default_aligned(B.data(), PacketSize)) % PacketSize;
MappedMatrixBlock L(tempv.data()+w*ldu+offset, nrow, u_cols, OuterStride<>(ldl));
L.setZero();
internal::sparselu_gemm<Scalar>(L.rows(), L.cols(), B.cols(), B.data(), B.outerStride(), U.data(), U.outerStride(), L.data(), L.outerStride());
// scatter U and L
u_col = 0;
for (jj = jcol; jj < jcol + w; jj++)
{
nextl_col = (jj-jcol) * m;
VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here
kfnz = repfnz_col(krep);
if ( kfnz == emptyIdxLU )
continue; // skip any zero segment
segsize = krep - kfnz + 1;
no_zeros = kfnz - fsupc;
Index isub = lptr + no_zeros;
Index off = u_rows-segsize;
for (Index i = 0; i < segsize; i++)
{
Index irow = glu.lsub(isub++);
dense_col(irow) = U.coeff(i+off,u_col);
U.coeffRef(i+off,u_col) = 0;
}
// Scatter l into SPA dense[]
for (Index i = 0; i < nrow; i++)
{
Index irow = glu.lsub(isub++);
dense_col(irow) -= L.coeff(i,u_col);
L.coeffRef(i,u_col) = 0;
}
u_col++;
}
}
else // level 2 only
{
// Sequence through each column in the panel
for (jj = jcol; jj < jcol + w; jj++)
{
nextl_col = (jj-jcol) * m;
VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here
kfnz = repfnz_col(krep);
if ( kfnz == emptyIdxLU )
continue; // skip any zero segment
segsize = krep - kfnz + 1;
luptr = glu.xlusup(fsupc);
Index lda = glu.xlusup(fsupc+1)-glu.xlusup(fsupc);// nsupr
// Perform a trianglar solve and block update,
// then scatter the result of sup-col update to dense[]
no_zeros = kfnz - fsupc;
if(segsize==1) LU_kernel_bmod<1>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
else if(segsize==2) LU_kernel_bmod<2>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
else if(segsize==3) LU_kernel_bmod<3>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
else LU_kernel_bmod<Dynamic>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
} // End for each column in the panel
}
} // End for each updating supernode
} // end panel bmod
} // end namespace internal
} // end namespace Eigen
#endif // SPARSELU_PANEL_BMOD_H

View File

@@ -0,0 +1,258 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
/*
* NOTE: This file is the modified version of [s,d,c,z]panel_dfs.c file in SuperLU
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* November 15, 1997
*
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
*
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
*
* Permission is hereby granted to use or copy this program for any
* purpose, provided the above notices are retained on all copies.
* Permission to modify the code and to distribute modified code is
* granted, provided the above notices are retained, and a notice that
* the code was modified is included with the above copyright notice.
*/
#ifndef SPARSELU_PANEL_DFS_H
#define SPARSELU_PANEL_DFS_H
namespace Eigen {
namespace internal {
template<typename IndexVector>
struct panel_dfs_traits
{
typedef typename IndexVector::Scalar StorageIndex;
panel_dfs_traits(Index jcol, StorageIndex* marker)
: m_jcol(jcol), m_marker(marker)
{}
bool update_segrep(Index krep, StorageIndex jj)
{
if(m_marker[krep]<m_jcol)
{
m_marker[krep] = jj;
return true;
}
return false;
}
void mem_expand(IndexVector& /*glu.lsub*/, Index /*nextl*/, Index /*chmark*/) {}
enum { ExpandMem = false };
Index m_jcol;
StorageIndex* m_marker;
};
template <typename Scalar, typename StorageIndex>
template <typename Traits>
void SparseLUImpl<Scalar,StorageIndex>::dfs_kernel(const StorageIndex jj, IndexVector& perm_r,
Index& nseg, IndexVector& panel_lsub, IndexVector& segrep,
Ref<IndexVector> repfnz_col, IndexVector& xprune, Ref<IndexVector> marker, IndexVector& parent,
IndexVector& xplore, GlobalLU_t& glu,
Index& nextl_col, Index krow, Traits& traits
)
{
StorageIndex kmark = marker(krow);
// For each unmarked krow of jj
marker(krow) = jj;
StorageIndex kperm = perm_r(krow);
if (kperm == emptyIdxLU ) {
// krow is in L : place it in structure of L(*, jj)
panel_lsub(nextl_col++) = StorageIndex(krow); // krow is indexed into A
traits.mem_expand(panel_lsub, nextl_col, kmark);
}
else
{
// krow is in U : if its supernode-representative krep
// has been explored, update repfnz(*)
// krep = supernode representative of the current row
StorageIndex krep = glu.xsup(glu.supno(kperm)+1) - 1;
// First nonzero element in the current column:
StorageIndex myfnz = repfnz_col(krep);
if (myfnz != emptyIdxLU )
{
// Representative visited before
if (myfnz > kperm ) repfnz_col(krep) = kperm;
}
else
{
// Otherwise, perform dfs starting at krep
StorageIndex oldrep = emptyIdxLU;
parent(krep) = oldrep;
repfnz_col(krep) = kperm;
StorageIndex xdfs = glu.xlsub(krep);
Index maxdfs = xprune(krep);
StorageIndex kpar;
do
{
// For each unmarked kchild of krep
while (xdfs < maxdfs)
{
StorageIndex kchild = glu.lsub(xdfs);
xdfs++;
StorageIndex chmark = marker(kchild);
if (chmark != jj )
{
marker(kchild) = jj;
StorageIndex chperm = perm_r(kchild);
if (chperm == emptyIdxLU)
{
// case kchild is in L: place it in L(*, j)
panel_lsub(nextl_col++) = kchild;
traits.mem_expand(panel_lsub, nextl_col, chmark);
}
else
{
// case kchild is in U :
// chrep = its supernode-rep. If its rep has been explored,
// update its repfnz(*)
StorageIndex chrep = glu.xsup(glu.supno(chperm)+1) - 1;
myfnz = repfnz_col(chrep);
if (myfnz != emptyIdxLU)
{ // Visited before
if (myfnz > chperm)
repfnz_col(chrep) = chperm;
}
else
{ // Cont. dfs at snode-rep of kchild
xplore(krep) = xdfs;
oldrep = krep;
krep = chrep; // Go deeper down G(L)
parent(krep) = oldrep;
repfnz_col(krep) = chperm;
xdfs = glu.xlsub(krep);
maxdfs = xprune(krep);
} // end if myfnz != -1
} // end if chperm == -1
} // end if chmark !=jj
} // end while xdfs < maxdfs
// krow has no more unexplored nbrs :
// Place snode-rep krep in postorder DFS, if this
// segment is seen for the first time. (Note that
// "repfnz(krep)" may change later.)
// Baktrack dfs to its parent
if(traits.update_segrep(krep,jj))
//if (marker1(krep) < jcol )
{
segrep(nseg) = krep;
++nseg;
//marker1(krep) = jj;
}
kpar = parent(krep); // Pop recursion, mimic recursion
if (kpar == emptyIdxLU)
break; // dfs done
krep = kpar;
xdfs = xplore(krep);
maxdfs = xprune(krep);
} while (kpar != emptyIdxLU); // Do until empty stack
} // end if (myfnz = -1)
} // end if (kperm == -1)
}
/**
* \brief Performs a symbolic factorization on a panel of columns [jcol, jcol+w)
*
* A supernode representative is the last column of a supernode.
* The nonzeros in U[*,j] are segments that end at supernodes representatives
*
* The routine returns a list of the supernodal representatives
* in topological order of the dfs that generates them. This list is
* a superset of the topological order of each individual column within
* the panel.
* The location of the first nonzero in each supernodal segment
* (supernodal entry location) is also returned. Each column has
* a separate list for this purpose.
*
* Two markers arrays are used for dfs :
* marker[i] == jj, if i was visited during dfs of current column jj;
* marker1[i] >= jcol, if i was visited by earlier columns in this panel;
*
* \param[in] m number of rows in the matrix
* \param[in] w Panel size
* \param[in] jcol Starting column of the panel
* \param[in] A Input matrix in column-major storage
* \param[in] perm_r Row permutation
* \param[out] nseg Number of U segments
* \param[out] dense Accumulate the column vectors of the panel
* \param[out] panel_lsub Subscripts of the row in the panel
* \param[out] segrep Segment representative i.e first nonzero row of each segment
* \param[out] repfnz First nonzero location in each row
* \param[out] xprune The pruned elimination tree
* \param[out] marker work vector
* \param parent The elimination tree
* \param xplore work vector
* \param glu The global data structure
*
*/
template <typename Scalar, typename StorageIndex>
void SparseLUImpl<Scalar,StorageIndex>::panel_dfs(const Index m, const Index w, const Index jcol, MatrixType& A, IndexVector& perm_r, Index& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu)
{
Index nextl_col; // Next available position in panel_lsub[*,jj]
// Initialize pointers
VectorBlock<IndexVector> marker1(marker, m, m);
nseg = 0;
panel_dfs_traits<IndexVector> traits(jcol, marker1.data());
// For each column in the panel
for (StorageIndex jj = StorageIndex(jcol); jj < jcol + w; jj++)
{
nextl_col = (jj - jcol) * m;
VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero location in each row
VectorBlock<ScalarVector> dense_col(dense,nextl_col, m); // Accumulate a column vector here
// For each nnz in A[*, jj] do depth first search
for (typename MatrixType::InnerIterator it(A, jj); it; ++it)
{
Index krow = it.row();
dense_col(krow) = it.value();
StorageIndex kmark = marker(krow);
if (kmark == jj)
continue; // krow visited before, go to the next nonzero
dfs_kernel(jj, perm_r, nseg, panel_lsub, segrep, repfnz_col, xprune, marker, parent,
xplore, glu, nextl_col, krow, traits);
}// end for nonzeros in column jj
} // end for column jj
}
} // end namespace internal
} // end namespace Eigen
#endif // SPARSELU_PANEL_DFS_H

View File

@@ -0,0 +1,137 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
/*
* NOTE: This file is the modified version of xpivotL.c file in SuperLU
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* October 15, 2003
*
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
*
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
*
* Permission is hereby granted to use or copy this program for any
* purpose, provided the above notices are retained on all copies.
* Permission to modify the code and to distribute modified code is
* granted, provided the above notices are retained, and a notice that
* the code was modified is included with the above copyright notice.
*/
#ifndef SPARSELU_PIVOTL_H
#define SPARSELU_PIVOTL_H
namespace Eigen {
namespace internal {
/**
* \brief Performs the numerical pivotin on the current column of L, and the CDIV operation.
*
* Pivot policy :
* (1) Compute thresh = u * max_(i>=j) abs(A_ij);
* (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN
* pivot row = k;
* ELSE IF abs(A_jj) >= thresh THEN
* pivot row = j;
* ELSE
* pivot row = m;
*
* Note: If you absolutely want to use a given pivot order, then set u=0.0.
*
* \param jcol The current column of L
* \param diagpivotthresh diagonal pivoting threshold
* \param[in,out] perm_r Row permutation (threshold pivoting)
* \param[in] iperm_c column permutation - used to finf diagonal of Pc*A*Pc'
* \param[out] pivrow The pivot row
* \param glu Global LU data
* \return 0 if success, i > 0 if U(i,i) is exactly zero
*
*/
template <typename Scalar, typename StorageIndex>
Index SparseLUImpl<Scalar,StorageIndex>::pivotL(const Index jcol, const RealScalar& diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, Index& pivrow, GlobalLU_t& glu)
{
Index fsupc = (glu.xsup)((glu.supno)(jcol)); // First column in the supernode containing the column jcol
Index nsupc = jcol - fsupc; // Number of columns in the supernode portion, excluding jcol; nsupc >=0
Index lptr = glu.xlsub(fsupc); // pointer to the starting location of the row subscripts for this supernode portion
Index nsupr = glu.xlsub(fsupc+1) - lptr; // Number of rows in the supernode
Index lda = glu.xlusup(fsupc+1) - glu.xlusup(fsupc); // leading dimension
Scalar* lu_sup_ptr = &(glu.lusup.data()[glu.xlusup(fsupc)]); // Start of the current supernode
Scalar* lu_col_ptr = &(glu.lusup.data()[glu.xlusup(jcol)]); // Start of jcol in the supernode
StorageIndex* lsub_ptr = &(glu.lsub.data()[lptr]); // Start of row indices of the supernode
// Determine the largest abs numerical value for partial pivoting
Index diagind = iperm_c(jcol); // diagonal index
RealScalar pivmax(-1.0);
Index pivptr = nsupc;
Index diag = emptyIdxLU;
RealScalar rtemp;
Index isub, icol, itemp, k;
for (isub = nsupc; isub < nsupr; ++isub) {
using std::abs;
rtemp = abs(lu_col_ptr[isub]);
if (rtemp > pivmax) {
pivmax = rtemp;
pivptr = isub;
}
if (lsub_ptr[isub] == diagind) diag = isub;
}
// Test for singularity
if ( pivmax <= RealScalar(0.0) ) {
// if pivmax == -1, the column is structurally empty, otherwise it is only numerically zero
pivrow = pivmax < RealScalar(0.0) ? diagind : lsub_ptr[pivptr];
perm_r(pivrow) = StorageIndex(jcol);
return (jcol+1);
}
RealScalar thresh = diagpivotthresh * pivmax;
// Choose appropriate pivotal element
{
// Test if the diagonal element can be used as a pivot (given the threshold value)
if (diag >= 0 )
{
// Diagonal element exists
using std::abs;
rtemp = abs(lu_col_ptr[diag]);
if (rtemp != RealScalar(0.0) && rtemp >= thresh) pivptr = diag;
}
pivrow = lsub_ptr[pivptr];
}
// Record pivot row
perm_r(pivrow) = StorageIndex(jcol);
// Interchange row subscripts
if (pivptr != nsupc )
{
std::swap( lsub_ptr[pivptr], lsub_ptr[nsupc] );
// Interchange numerical values as well, for the two rows in the whole snode
// such that L is indexed the same way as A
for (icol = 0; icol <= nsupc; icol++)
{
itemp = pivptr + icol * lda;
std::swap(lu_sup_ptr[itemp], lu_sup_ptr[nsupc + icol * lda]);
}
}
// cdiv operations
Scalar temp = Scalar(1.0) / lu_col_ptr[nsupc];
for (k = nsupc+1; k < nsupr; k++)
lu_col_ptr[k] *= temp;
return 0;
}
} // end namespace internal
} // end namespace Eigen
#endif // SPARSELU_PIVOTL_H

View File

@@ -0,0 +1,136 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
/*
* NOTE: This file is the modified version of [s,d,c,z]pruneL.c file in SuperLU
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* November 15, 1997
*
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
*
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
*
* Permission is hereby granted to use or copy this program for any
* purpose, provided the above notices are retained on all copies.
* Permission to modify the code and to distribute modified code is
* granted, provided the above notices are retained, and a notice that
* the code was modified is included with the above copyright notice.
*/
#ifndef SPARSELU_PRUNEL_H
#define SPARSELU_PRUNEL_H
namespace Eigen {
namespace internal {
/**
* \brief Prunes the L-structure.
*
* It prunes the L-structure of supernodes whose L-structure contains the current pivot row "pivrow"
*
*
* \param jcol The current column of L
* \param[in] perm_r Row permutation
* \param[out] pivrow The pivot row
* \param nseg Number of segments
* \param segrep
* \param repfnz
* \param[out] xprune
* \param glu Global LU data
*
*/
template <typename Scalar, typename StorageIndex>
void SparseLUImpl<Scalar,StorageIndex>::pruneL(const Index jcol, const IndexVector& perm_r, const Index pivrow, const Index nseg,
const IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, GlobalLU_t& glu)
{
// For each supernode-rep irep in U(*,j]
Index jsupno = glu.supno(jcol);
Index i,irep,irep1;
bool movnum, do_prune = false;
Index kmin = 0, kmax = 0, minloc, maxloc,krow;
for (i = 0; i < nseg; i++)
{
irep = segrep(i);
irep1 = irep + 1;
do_prune = false;
// Don't prune with a zero U-segment
if (repfnz(irep) == emptyIdxLU) continue;
// If a snode overlaps with the next panel, then the U-segment
// is fragmented into two parts -- irep and irep1. We should let
// pruning occur at the rep-column in irep1s snode.
if (glu.supno(irep) == glu.supno(irep1) ) continue; // don't prune
// If it has not been pruned & it has a nonz in row L(pivrow,i)
if (glu.supno(irep) != jsupno )
{
if ( xprune (irep) >= glu.xlsub(irep1) )
{
kmin = glu.xlsub(irep);
kmax = glu.xlsub(irep1) - 1;
for (krow = kmin; krow <= kmax; krow++)
{
if (glu.lsub(krow) == pivrow)
{
do_prune = true;
break;
}
}
}
if (do_prune)
{
// do a quicksort-type partition
// movnum=true means that the num values have to be exchanged
movnum = false;
if (irep == glu.xsup(glu.supno(irep)) ) // Snode of size 1
movnum = true;
while (kmin <= kmax)
{
if (perm_r(glu.lsub(kmax)) == emptyIdxLU)
kmax--;
else if ( perm_r(glu.lsub(kmin)) != emptyIdxLU)
kmin++;
else
{
// kmin below pivrow (not yet pivoted), and kmax
// above pivrow: interchange the two suscripts
std::swap(glu.lsub(kmin), glu.lsub(kmax));
// If the supernode has only one column, then we
// only keep one set of subscripts. For any subscript
// intercnahge performed, similar interchange must be
// done on the numerical values.
if (movnum)
{
minloc = glu.xlusup(irep) + ( kmin - glu.xlsub(irep) );
maxloc = glu.xlusup(irep) + ( kmax - glu.xlsub(irep) );
std::swap(glu.lusup(minloc), glu.lusup(maxloc));
}
kmin++;
kmax--;
}
} // end while
xprune(irep) = StorageIndex(kmin); //Pruning
} // end if do_prune
} // end pruning
} // End for each U-segment
}
} // end namespace internal
} // end namespace Eigen
#endif // SPARSELU_PRUNEL_H

View File

@@ -0,0 +1,83 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
/* This file is a modified version of heap_relax_snode.c file in SuperLU
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* October 15, 2003
*
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
*
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
*
* Permission is hereby granted to use or copy this program for any
* purpose, provided the above notices are retained on all copies.
* Permission to modify the code and to distribute modified code is
* granted, provided the above notices are retained, and a notice that
* the code was modified is included with the above copyright notice.
*/
#ifndef SPARSELU_RELAX_SNODE_H
#define SPARSELU_RELAX_SNODE_H
namespace Eigen {
namespace internal {
/**
* \brief Identify the initial relaxed supernodes
*
* This routine is applied to a column elimination tree.
* It assumes that the matrix has been reordered according to the postorder of the etree
* \param n the number of columns
* \param et elimination tree
* \param relax_columns Maximum number of columns allowed in a relaxed snode
* \param descendants Number of descendants of each node in the etree
* \param relax_end last column in a supernode
*/
template <typename Scalar, typename StorageIndex>
void SparseLUImpl<Scalar,StorageIndex>::relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end)
{
// compute the number of descendants of each node in the etree
Index parent;
relax_end.setConstant(emptyIdxLU);
descendants.setZero();
for (Index j = 0; j < n; j++)
{
parent = et(j);
if (parent != n) // not the dummy root
descendants(parent) += descendants(j) + 1;
}
// Identify the relaxed supernodes by postorder traversal of the etree
Index snode_start; // beginning of a snode
for (Index j = 0; j < n; )
{
parent = et(j);
snode_start = j;
while ( parent != n && descendants(parent) < relax_columns )
{
j = parent;
parent = et(j);
}
// Found a supernode in postordered etree, j is the last column
relax_end(snode_start) = StorageIndex(j); // Record last column
j++;
// Search for a new leaf
while (descendants(j) != 0 && j < n) j++;
} // End postorder traversal of the etree
}
} // end namespace internal
} // end namespace Eigen
#endif