diff --git a/src/madness/tensor/clapack_fortran.h b/src/madness/tensor/clapack_fortran.h index 6f291b94ead..19d0e770a1a 100644 --- a/src/madness/tensor/clapack_fortran.h +++ b/src/madness/tensor/clapack_fortran.h @@ -79,11 +79,25 @@ # define zhegv_ zhegv #endif +# define spotrf_ spotrf +# define cpotrf_ cpotrf # define dpotrf_ dpotrf +# define zpotrf_ zpotrf + +# define sgetrf_ sgetrf +# define cgetrf_ cgetrf # define dgetrf_ dgetrf +# define zgetrf_ zgetrf + +# define sgetri_ sgetri +# define cgetri_ cgetri # define dgetri_ dgetri +# define zgetri_ zgetri +# define strsm_ strsm +# define ctrsm_ ctrsm # define dtrsm_ dtrsm +# define ztrsm_ ztrsm # define dlamch_ dlamch # define slamch_ slamch @@ -100,32 +114,39 @@ # endif #endif -extern "C" - double dlamch_(const char* mode, int modelen); +// SUBROUTINE DLAMCH( CMACH, RESULT ) + +// PURPOSE +// DLAMCH determines double precision machine parameters. extern "C" float slamch_(const char* mode, int modelen); +extern "C" + double dlamch_(const char* mode, int modelen); +// SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO ) + +// PURPOSE +// DGESVD computes the singular value decomposition (SVD) of a real +// M-by-N matrix A, optionally computing the left and/or right singular +// vectors. extern "C" void sgesvd_(const char *jobu, const char *jobvt, integer *m, integer *n, real4 *a, integer *lda, real4 *s, real4 *u, integer *ldu, real4 *vt, integer *ldvt, real4 *work, integer *lwork, integer *info, char_len jobulen, char_len jobvtlen); - extern "C" void dgesvd_(const char *jobu, const char *jobvt, integer *m, integer *n, real8 *a, integer *lda, real8 *s, real8 *u, integer *ldu, real8 *vt, integer *ldvt, real8 *work, integer *lwork, integer *info, char_len jobulen, char_len jobvtlen); - extern "C" void cgesvd_(const char *jobu, const char *jobvt, integer *m, integer *n, complex_real4 *a, integer *lda, real4 *s, complex_real4 *u, integer *ldu, complex_real4 *vt, integer *ldvt, complex_real4 *work, integer *lwork, real4 *rwork, integer *info, char_len jobulen, char_len jobvtlen); - extern "C" void zgesvd_(const char *jobu, const char *jobvt, integer *m, integer *n, complex_real8 *a, integer *lda, real8 *s, complex_real8 *u, @@ -133,42 +154,51 @@ extern "C" integer *lwork, real8 *rwork, integer *info, char_len jobulen, char_len jobvtlen); +// SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) + +// PURPOSE +// DGESV computes the solution to a real system of linear equations +// A * X = B, +// where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + extern "C" void sgesv_(integer* n, integer* nrhs, real4* AT, integer* lda, integer* piv, real4* x, integer* ldx, integer* info); - extern "C" void dgesv_(integer* n, integer* nrhs, real8* AT, integer* lda, integer* piv, real8* x, integer* ldx, integer* info); - extern "C" void cgesv_(integer* n, integer* nrhs, complex_real4* AT, integer* lda, integer* piv, complex_real4* x, integer* ldx, integer* info); - extern "C" void zgesv_(integer* n, integer* nrhs, complex_real8* AT, integer* lda, integer* piv, complex_real8* x, integer* ldx, integer* info); +// SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, WORK, LWORK, INFO ) + +// PURPOSE +// DGELSS computes the minimum norm solution to a real linear least +// squares problem: +// Minimize 2-norm(| b - A*x |) +// using the singular value decomposition (SVD) of A. A is an M-by-N +// matrix which may be rank-deficient. extern "C" void sgelss_(integer *m, integer *n, integer *nrhs, real4 *a, integer *lda, real4 *b, integer *ldb, real4 *sOUT, real4 *rcondIN, integer *rankOUT, real4 *work, integer *lwork, integer *infoOUT); - extern "C" void dgelss_(integer *m, integer *n, integer *nrhs, real8 *a, integer *lda, real8 *b, integer *ldb, real8 *sOUT, real8 *rcondIN, integer *rankOUT, real8 *work, integer *lwork, integer *infoOUT); - extern "C" void cgelss_(integer *m, integer *n, integer *nrhs, complex_real4 *a, integer *lda, complex_real4 *b, integer *ldb, real4 *sOUT, real4 *rcondIN, integer *rankOUT, complex_real4 *work, integer *lwork, real4 *rwork, integer *infoOUT); - extern "C" void zgelss_(integer *m, integer *n, integer *nrhs, complex_real8 *a, integer *lda, complex_real8 *b, integer *ldb, @@ -176,37 +206,38 @@ extern "C" real8 *rcondIN, integer *rankOUT, complex_real8 *work, integer *lwork, real8 *rwork, integer *infoOUT); +// SUBROUTINE DGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, INFO ) + +// PURPOSE +// DGELS solves overdetermined or underdetermined real linear systems +// involving an M-by-N matrix A, or its transpose, using a QR or LQ +// factorization of A. It is assumed that A has full rank. + extern "C" void sgels_(const char *trans, integer *m, integer *n, integer *nrhs, real4 *a, integer *lda, real4 *b, integer *ldb, real4 *work, integer *lwork, integer *infoOUT, char_len translen); - extern "C" void dgels_(const char *trans, integer *m, integer *n, integer *nrhs, real8 *a, integer *lda, real8 *b, integer *ldb, real8 *work, integer *lwork, integer *infoOUT, char_len translen); - extern "C" void cgels_(const char *trans, integer *m, integer *n, integer *nrhs, complex_real4 *a, integer *lda, complex_real4 *b, integer *ldb, complex_real4 *work, integer *lwork, real4 *rwork, integer *infoOUT, char_len translen); - extern "C" void zgels_(const char *trans, integer *m, integer *n, integer *nrhs, complex_real8 *a, integer *lda, complex_real8 *b, integer *ldb, complex_real8 *work, integer *lwork, real8 *rwork, integer *infoOUT, char_len translen); -extern "C" - void ssyev_(const char* jobz, const char* uplo, integer *n, - real4 *a, integer *lda, real4 *w, real4 *work, integer *lwork, - integer *info, char_len jobzlen, char_len uplo_len ); +// SUBROUTINE DGGEV( JOBZ, UPLO, N, A, LDA, B, LDB, ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, WORK, LWORK, INFO ) -extern "C" - void dsyev_(const char* jobz, const char* uplo, integer *n, - real8 *a, integer *lda, real8 *w, real8 *work, integer *lwork, - integer *info, char_len jobzlen, char_len uplo_len ); +// PURPOSE +// DGGEV computes for a pair of N-by-N real nonsymmetric matrices (A,B) +// the generalized eigenvalues, and optionally, the left and/or right +// generalized eigenvectors. extern "C" void sggev_(const char* jobz, const char* uplo, integer *n, @@ -215,7 +246,6 @@ extern "C" real4* vl, integer* ldvl, real4* vr, integer* ldvr, real4* work, integer* lwork, integer* info, char_len jobzlen, char_len uplo_len); - extern "C" void dggev_(const char* jobl, const char* jobr, integer *n, real8 *a, integer *lda, real8 *b, integer *ldb, @@ -223,14 +253,6 @@ extern "C" real8 *vl, integer *ldvl, real8 *vr, integer *ldvr, real8 *work, integer *lwork, integer *info, char_len jobzlen, char_len uplo_len); - - // void dggev_(const char* jobz, const char* uplo, integer *n, - // real8* a, integer* lda, real8* b, integer* ldb, - // real8* alphar, real8* alphai, real8* beta, - // real8* vl, integer* ldvl, real8* vr, integer* ldvr, - // real8* work, integer* lwork, integer* info, - // char_len jobzlen, char_len uplo_len); - extern "C" void cggev_(const char* jobz, const char* uplo, integer *n, complex_real4* a, integer* lda, complex_real4* b, integer* ldb, @@ -238,7 +260,6 @@ extern "C" complex_real4* vl, integer* ldvl, complex_real4* vr, integer* ldvr, complex_real4* work, integer* lwork, real4* rwork, integer* info, char_len jobzlen, char_len uplo_len); - extern "C" void zggev_(const char* jobz, const char* uplo, integer *n, complex_real8* a, integer* lda, complex_real8* b, integer* ldb, @@ -247,70 +268,92 @@ extern "C" complex_real8* work, integer* lwork, real8* rwork, integer* info, char_len jobzlen, char_len uplo_len); -extern "C" - void dgeev_(const char* jobz, const char* uplo, integer *n, - real8* a, integer* lda, real8* w_real, real8* w_imag, real8* v, integer* ldv, - real8* vr, integer* ldvr, real8* work, integer* lwork, integer* info, - char_len jobzlen, char_len uplo_len ); +// SUBROUTINE DGEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO ) + +// PURPOSE +// DGEEV computes for an N-by-N real nonsymmetric matrix A, the +// eigenvalues and, optionally, the left and/or right eigenvectors. extern "C" void sgeev_(const char* jobz, const char* uplo, integer *n, real4* a, integer* lda, real4* w_real, real4* w_imag, real4* v, integer* ldv, real4* vr, integer* ldvr, real4* work, integer* lwork, integer* info, char_len jobzlen, char_len uplo_len ); - extern "C" - void zgeev_(const char* jobz, const char* uplo, integer *n, complex_real8* a, integer* lda, - complex_real8* w, complex_real8* vl, integer* ldvl, complex_real8* vr, integer* ldvr, - complex_real8* work, integer* lwork, real8* rwork, integer* info, + void dgeev_(const char* jobz, const char* uplo, integer *n, + real8* a, integer* lda, real8* w_real, real8* w_imag, real8* v, integer* ldv, + real8* vr, integer* ldvr, real8* work, integer* lwork, integer* info, char_len jobzlen, char_len uplo_len ); - extern "C" void cgeev_(const char* jobz, const char* uplo, integer *n, complex_real4* a, integer* lda, complex_real4* w, complex_real4* vl, integer* ldvl, complex_real4* vr, integer* ldvr, complex_real4* work, integer* lwork, real4* rwork, integer* info, char_len jobzlen, char_len uplo_len ); +extern "C" + void zgeev_(const char* jobz, const char* uplo, integer *n, complex_real8* a, integer* lda, + complex_real8* w, complex_real8* vl, integer* ldvl, complex_real8* vr, integer* ldvr, + complex_real8* work, integer* lwork, real8* rwork, integer* info, + char_len jobzlen, char_len uplo_len ); +// SUBROUTINE DSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO ) +// PURPOSE +// DSYEV computes all eigenvalues and, optionally, eigenvectors of a +// real symmetric matrix A. + +extern "C" + void ssyev_(const char* jobz, const char* uplo, integer *n, + real4 *a, integer *lda, real4 *w, real4 *work, integer *lwork, + integer *info, char_len jobzlen, char_len uplo_len ); +extern "C" + void dsyev_(const char* jobz, const char* uplo, integer *n, + real8 *a, integer *lda, real8 *w, real8 *work, integer *lwork, + integer *info, char_len jobzlen, char_len uplo_len ); extern "C" void cheev_(const char* jobz, const char* uplo, integer *n, complex_real4 *a, integer *lda, real4 *w, complex_real4 *work, integer *lwork, real4 *rwork, integer *info, char_len jobzlen, char_len uplo_len ); - extern "C" void zheev_(const char* jobz, const char* uplo, integer *n, complex_real8 *a, integer *lda, real8 *w, complex_real8 *work, integer *lwork, real8 *rwork, integer *info, char_len jobzlen, char_len uplo_len ); +// SUBROUTINE DSYGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, LWORK, INFO ) +// SUBROUTINE ZHEGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, LWORK, INFO ) + +// PURPOSE +// DSYGV computes all the eigenvalues, and optionally, the eigenvectors +// of a real generalized symmetric-definite eigenproblem, of the form +// A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. + extern "C" void ssygv_(integer *itype, const char* jobz, const char* uplo, integer *n, real4 *a, integer *lda, real4 *b, integer *ldb, real4 *w, real4 *work, integer *lwork, integer *info, char_len jobzlen, char_len uplo_len ); - extern "C" void dsygv_(integer *itype, const char* jobz, const char* uplo, integer *n, real8 *a, integer *lda, real8 *b, integer *ldb, real8 *w, real8 *work, integer *lwork, integer *info, char_len jobzlen, char_len uplo_len ); - extern "C" void chegv_(integer *itype, const char* jobz, const char* uplo, integer *n, complex_real4 *a, integer *lda, complex_real4 *b, integer *ldb, real4 *w, complex_real4 *work, integer *lwork, real4 *rwork, integer *info, char_len jobzlen, char_len uplo_len ); - extern "C" void zhegv_(integer *itype, const char* jobz, const char* uplo, integer *n, complex_real8 *a, integer *lda, complex_real8 *b, integer *ldb, real8 *w, complex_real8 *work, integer *lwork, real8 *rwork, integer *info, char_len jobzlen, char_len uplo_len ); -// dgeqrf (M, N, A, LDA, TAU, WORK, LWORK, INFO) + +// SUBROUTINE DGEQRF (M, N, A, LDA, TAU, WORK, LWORK, INFO) // -// DGEQRF computes a QR factorization of a real M-by-N matrix A: -// A = Q * R. +// PURPOSE +// DGEQRF computes a QR factorization of a real M-by-N matrix A: +// A = Q * R. extern "C" void sgeqrf_(integer *m, integer *n, @@ -332,8 +375,7 @@ extern "C" complex_real8 *a, integer *lda, complex_real8 *tau, complex_real8 *work, integer *lwork, integer *infoOUT); - -// dgeqp3(M, N, A, LDA, JPVT, TAU, WORK, LWORK, INFO ); +// SUBROUTINE DGEQP3(M, N, A, LDA, JPVT, TAU, WORK, LWORK, INFO ); // PURPOSE // DGEQP3 computes a QR factorization with column pivoting of a @@ -373,23 +415,25 @@ extern "C" void sorgqr_(integer *m, integer *n, integer *k, real4 *a, integer *lda, real4 *tau, real4 *work, integer *lwork, integer *info); - extern "C" void dorgqr_(integer *m, integer *n, integer *k, real8 *a, integer *lda, real8 *tau, real8 *work, integer *lwork, integer *info); - extern "C" void cungqr_(integer *m, integer *n, integer *k, complex_real4 *a, integer *lda, complex_real4 *tau, complex_real4 *work, integer *lwork, integer *info); - extern "C" void zungqr_(integer *m, integer *n, integer *k, complex_real8 *a, integer *lda, complex_real8 *tau, complex_real8 *work, integer *lwork, integer *info); -// cholesky +// SUBROUTINE DPOTRF( UPLO, N, A, LDA, INFO ) + +// PURPOSE +// computes the Cholesky factorization of a real symmetric +// positive definite matrix A. + extern "C" void spotrf_(const char *uplo, const integer* n, real4 *a, const integer *lda, integer *info, char_len uplo_len); extern "C" @@ -399,7 +443,12 @@ void cpotrf_(const char *uplo, const integer* n, complex_real4 *a, const integer extern "C" void zpotrf_(const char *uplo, const integer* n, complex_real8 *a, const integer *lda, integer *info, char_len uplo_len); -// rr_cholesky +// SUBROUTINE DPSTRF( UPLO, N, A, LDA, IPIV, RANK, TOL, WORK, INFO ) + +// PURPOSE +// DPSTRF computes the Cholesky factorization with complete +// pivoting of a real symmetric positive semidefinite matrix A. + extern "C" void spstrf_(const char *uplo, const integer* n, real4 *a, const integer *lda, integer* ipiv, integer* rank, real4* tol, real4* work, integer *info); @@ -413,28 +462,91 @@ extern "C" void zpstrf_(const char *uplo, const integer* n, complex_real8 *a, const integer *lda, integer* ipiv, integer* rank, real8* tol, complex_real8* work, integer *info); -extern "C" -void dpstrf_(const char *uplo, const integer* n, real8 *a, const integer *lda, integer* ipiv, integer* rank, real8* tol, - real8* work, integer *info); +// SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO ) +// PURPOSE +// DGETRF computes an LU factorization of a general M-by-N matrix A +// using partial pivoting with row interchanges. +extern "C" +void sgetrf_(const integer* m, const integer* n, real4 *a, const integer *lda, + integer* ipiv, integer *info); extern "C" void dgetrf_(const integer* m, const integer* n, real8 *a, const integer *lda, integer* ipiv, integer *info); +extern "C" +void cgetrf_(const integer* m, const integer* n, complex_real4 *a, const integer *lda, + integer* ipiv, integer *info); +extern "C" +void zgetrf_(const integer* m, const integer* n, complex_real8 *a, const integer *lda, + integer* ipiv, integer *info); + +// SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) + +// PURPOSE +// DGETRI computes the inverse of a matrix using the LU factorization +// computed by DGETRF. +extern "C" +void sgetri_(const integer* n, real4 *a, const integer *lda, const integer* ipiv, + real4 *work, const integer *lwork, integer *info); extern "C" void dgetri_(const integer* n, real8 *a, const integer *lda, const integer* ipiv, real8 *work, const integer *lwork, integer *info); +extern "C" +void cgetri_(const integer* n, complex_real4 *a, const integer *lda, const integer* ipiv, + complex_real4 *work, const integer *lwork, integer *info); +extern "C" +void zgetri_(const integer* n, complex_real8 *a, const integer *lda, const integer* ipiv, + complex_real8 *work, const integer *lwork, integer *info); + +// SUBROUTINE DTRSM( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB ) + +// PURPOSE +// DTRSM solves one of the matrix equations +// op( A )*X = alpha*B, or X*op( A ) = alpha*B, +// where alpha is a scalar, X and B are m by n matrices, A is a unit, or +// non-unit, upper or lower triangular matrix and op( A ) is one of +// op( A ) = A or op( A ) = A**T. +extern "C" +void strsm_(const char* side, const char* uplo, const char* transa, const char* diag, + const integer* m, const integer* n, const real4* alpha, + const real4* a, const integer* lda, real4* b, const integer* ldb, + char_len sidelen, char_len uplolen, char_len transalen, char_len diaglen); extern "C" void dtrsm_(const char* side, const char* uplo, const char* transa, const char* diag, - const integer* m, const integer* n, const real8* alpha, - const real8* a, const integer* lda, real8* b, const integer* ldb, + const integer* m, const integer* n, const real8* alpha, + const real8* a, const integer* lda, real8* b, const integer* ldb, + char_len sidelen, char_len uplolen, char_len transalen, char_len diaglen); +extern "C" +void ctrsm_(const char* side, const char* uplo, const char* transa, const char* diag, + const integer* m, const integer* n, const complex_real4* alpha, + const complex_real4* a, const integer* lda, complex_real4* b, const integer* ldb, + char_len sidelen, char_len uplolen, char_len transalen, char_len diaglen); +extern "C" +void ztrsm_(const char* side, const char* uplo, const char* transa, const char* diag, + const integer* m, const integer* n, const complex_real8* alpha, + const complex_real8* a, const integer* lda, complex_real8* b, const integer* ldb, char_len sidelen, char_len uplolen, char_len transalen, char_len diaglen); -// SUBROUTINE DTRTRI( UPLO, DIAG, N, A, LDA, INFO ) +// SUBROUTINE DTRTRI( UPLO, DIAG, N, A, LDA, INFO ) + +// PURPOSE +// DTRTRI computes the inverse of a real upper or lower triangular +// matrix A. + +extern "C" +void strtri_(const char* uplo, const char* diag, const integer* n, const real4* a, + const integer* lda, integer *info); extern "C" void dtrtri_(const char* uplo, const char* diag, const integer* n, const real8* a, const integer* lda, integer *info); +extern "C" +void ctrtri_(const char* uplo, const char* diag, const integer* n, const complex_real4* a, + const integer* lda, integer *info); +extern "C" +void ztrtri_(const char* uplo, const char* diag, const integer* n, const complex_real8* a, + const integer* lda, integer *info); #endif // MADNESS_LINALG_CLAPACK_FORTRAN_H__INCLUDED diff --git a/src/madness/tensor/lapack.cc b/src/madness/tensor/lapack.cc index 93b755c9d05..16d96a19b69 100644 --- a/src/madness/tensor/lapack.cc +++ b/src/madness/tensor/lapack.cc @@ -72,14 +72,14 @@ double tt1, ss1; # define STATIC #endif -/// These oddly-named wrappers enable the generic svd iterface to get -/// the correct LAPACK routine based upon the argument type. Internal +/// These oddly-named wrappers enable the generic svd interface to get +/// the correct LAPACK routine based upon the argument type. Internal /// use only. -STATIC inline -void dgesvd_(const char *jobu, const char *jobvt, integer *m, integer *n, - real4 *a, integer *lda, real4 *s, real4 *u, integer *ldu, - real4 *vt, integer *ldvt, real4 *work, integer *lwork, - integer *info, char_len jobulen, char_len jobvtlen) { + +STATIC inline void gesvd_(const char *jobu, const char *jobvt, integer *m, integer *n, + real4 *a, integer *lda, real4 *s, real4 *u, integer *ldu, + real4 *vt, integer *ldvt, real4 *work, integer *lwork, + integer *info, char_len jobulen, char_len jobvtlen) { //std::cout << "n " << *n << " m " << *m << " lwork " << *lwork << std::endl; //std::cout << " sizeof(integer) " << sizeof(integer) << std::endl; #if MADNESS_LINALG_USE_LAPACKE @@ -91,22 +91,23 @@ void dgesvd_(const char *jobu, const char *jobvt, integer *m, integer *n, #endif } +STATIC inline void gesvd_(const char *jobu, const char *jobvt, integer *m, integer *n, + real8 *a, integer *lda, real8 *s, real8 *u, integer *ldu, + real8 *vt, integer *ldvt, real8 *work, integer *lwork, + integer *info, char_len jobulen, char_len jobvtlen){ #if MADNESS_LINALG_USE_LAPACKE -STATIC inline -void dgesvd_(const char *jobu, const char *jobvt, integer *m, integer *n, - real8 *a, integer *lda, real8 *s, real8 *u, integer *ldu, - real8 *vt, integer *ldvt, real8 *work, integer *lwork, - integer *info, char_len jobulen, char_len jobvtlen){ - dgesvd_(jobu, jobvt, m, n, a, lda, s, u, ldu, - vt, ldvt, work, lwork, info); + dgesvd_(jobu, jobvt, m, n, a, lda, s, u, ldu, + vt, ldvt, work, lwork, info); +#else + dgesvd_(jobu, jobvt, m, n, a, lda, s, u, ldu, + vt, ldvt, work, lwork, info, jobulen, jobvtlen); +#endif } -#endif -STATIC inline -void dgesvd_(const char *jobu, const char *jobvt, integer *m, integer *n, - complex_real4 *a, integer *lda, real4 *s, complex_real4 *u, integer *ldu, - complex_real4 *vt, integer *ldvt, complex_real4 *work, integer *lwork, - integer *info, char_len jobulen, char_len jobvtlen) { +STATIC inline void gesvd_(const char *jobu, const char *jobvt, integer *m, integer *n, + complex_real4 *a, integer *lda, real4 *s, complex_real4 *u, integer *ldu, + complex_real4 *vt, integer *ldvt, complex_real4 *work, integer *lwork, + integer *info, char_len jobulen, char_len jobvtlen) { Tensor rwork(5*min(*m,*n)); #if MADNESS_LINALG_USE_LAPACKE cgesvd_(jobu, jobvt, m, n, to_cptr(a), lda, s, to_cptr(u), ldu, @@ -117,11 +118,10 @@ void dgesvd_(const char *jobu, const char *jobvt, integer *m, integer *n, #endif } -STATIC inline -void dgesvd_(const char *jobu, const char *jobvt, integer *m, integer *n, - complex_real8 *a, integer *lda, real8 *s, complex_real8 *u, integer *ldu, - complex_real8 *vt, integer *ldvt, complex_real8 *work, integer *lwork, - integer *info, char_len jobulen, char_len jobvtlen) { +STATIC inline void gesvd_(const char *jobu, const char *jobvt, integer *m, integer *n, + complex_real8 *a, integer *lda, real8 *s, complex_real8 *u, integer *ldu, + complex_real8 *vt, integer *ldvt, complex_real8 *work, integer *lwork, + integer *info, char_len jobulen, char_len jobvtlen) { Tensor rwork(5*min(*m,*n)); #if MADNESS_LINALG_USE_LAPACKE zgesvd_(jobu, jobvt, m, n, to_zptr(a), lda, s, to_zptr(u), ldu, @@ -132,277 +132,312 @@ void dgesvd_(const char *jobu, const char *jobvt, integer *m, integer *n, #endif } -/// These oddly-named wrappers enable the generic cholesky iterface to get -/// the correct LAPACK routine based upon the argument type. Internal +/// These oddly-named wrappers enable the generic gesv interface to get +/// the correct LAPACK routine based upon the argument type. Internal /// use only. -STATIC inline -void potrf_(const char * UPLO,integer *n, real4 *a ,integer *lda , integer *info){ -#if MADNESS_LINALG_USE_LAPACKE - spotrf_(UPLO, n, a, lda, info); -#else - spotrf_(UPLO, n, a, lda, info, 1); -#endif + +STATIC inline void gesv_(integer* n, integer* nrhs, real4* AT, integer* lda, + integer* piv, real4* x, integer* ldx, integer* info) { + sgesv_(n, nrhs, AT, lda, piv, x, ldx, info); } -STATIC inline -void potrf_(const char * UPLO,integer *n, real8 *a ,integer *lda , integer *info){ -#if MADNESS_LINALG_USE_LAPACKE - dpotrf_(UPLO, n, a, lda, info); -#else - dpotrf_(UPLO, n, a, lda, info, 1); -#endif + +STATIC inline void gesv_(integer* n, integer* nrhs, real8* AT, integer* lda, + integer* piv, real8* x, integer* ldx, integer* info) { + dgesv_(n, nrhs, AT, lda, piv, x, ldx, info); } -STATIC inline -void potrf_(const char * UPLO,integer *n, complex_real4 *a ,integer *lda , integer *info){ + +STATIC inline void gesv_(integer* n, integer* nrhs, float_complex* AT, integer* lda, + integer* piv, float_complex* x, integer* ldx, integer* info) { #if MADNESS_LINALG_USE_LAPACKE - cpotrf_(UPLO, n, to_cptr(a), lda, info); + cgesv_(n, nrhs, to_cptr(AT), lda, piv, to_cptr(x), ldx, info); #else - cpotrf_(UPLO, n, a, lda, info, 1); + cgesv_(n, nrhs, AT, lda, piv, x, ldx, info); #endif } -STATIC inline -void potrf_(const char * UPLO,integer *n, complex_real8 *a ,integer *lda , integer *info){ + +STATIC inline void gesv_(integer* n, integer* nrhs, double_complex* AT, integer* lda, + integer* piv, double_complex* x, integer* ldx, integer* info) { #if MADNESS_LINALG_USE_LAPACKE - zpotrf_(UPLO, n, to_zptr(a), lda, info); + zgesv_(n, nrhs, to_zptr(AT), lda, piv, to_zptr(x), ldx, info); #else - zpotrf_(UPLO, n, a, lda, info, 1); + zgesv_(n, nrhs, AT, lda, piv, x, ldx, info); #endif } -/// These oddly-named wrappers enable the generic rr_cholesky iterface to get -/// the correct LAPACK routine based upon the argument type. Internal +/// These oddly-named wrappers enable the generic gelss interface to get +/// the correct LAPACK routine based upon the argument type. Internal /// use only. -STATIC inline -void pstrf_(const char * UPLO,integer *n, real4 *a ,integer* lda, integer *piv, integer* rank, real4* tol, real4* work , integer *info){ -#if MADNESS_LINALG_USE_LAPACKE - spstrf_(UPLO, n, a, lda, piv, rank, tol, work, info); -#else - spstrf_(UPLO, n, a, lda, piv, rank, tol, work, info); -#endif + +STATIC inline void gelss_(integer *m, integer *n, integer *nrhs, + real4 *a, integer *lda, real4 *b, integer *ldb, real4 *sOUT, + real4 *rcondIN, integer *rankOUT, real4 *work, + integer *lwork, integer *infoOUT) { + sgelss_(m, n, nrhs, a, lda, b, ldb, sOUT, rcondIN, rankOUT, work, lwork, infoOUT); } -STATIC inline -void pstrf_(const char * UPLO,integer *n, real8 *a ,integer* lda, integer *piv, integer* rank, real8* tol, real8* work , integer *info){ + +STATIC inline void gelss_(integer *m, integer *n, integer *nrhs, + real8 *a, integer *lda, real8 *b, integer *ldb, real8 *sOUT, + real8 *rcondIN, integer *rankOUT, real8 *work, + integer *lwork, integer *infoOUT) { + dgelss_(m, n, nrhs, a, lda, b, ldb, sOUT, rcondIN, rankOUT, work, lwork, infoOUT); +} + +STATIC inline void gelss_(integer *m, integer *n, integer *nrhs, + float_complex *a, integer *lda, float_complex *b, + integer *ldb, float *sOUT, + float *rcondIN, integer *rankOUT, float_complex *work, + integer *lwork, integer *infoOUT) { + Tensor rwork((5*min(*m,*n))); #if MADNESS_LINALG_USE_LAPACKE - dpstrf_(UPLO, n, a, lda, piv, rank, tol, work, info); + cgelss_(m, n, nrhs, to_cptr(a), lda, to_cptr(b), ldb, sOUT, + rcondIN, rankOUT, to_cptr(work), lwork, rwork.ptr(), infoOUT); #else - dpstrf_(UPLO, n, a, lda, piv, rank, tol, work, info); + cgelss_(m, n, nrhs, a, lda, b, ldb, sOUT, rcondIN, rankOUT, work, + lwork, rwork.ptr(), infoOUT); #endif } -STATIC inline -void pstrf_(const char * UPLO,integer *n, complex_real4 *a ,integer* lda, integer *piv, integer* rank, real4* tol, complex_real4* work , integer *info){ + +STATIC inline void gelss_(integer *m, integer *n, integer *nrhs, + double_complex *a, integer *lda, double_complex *b, + integer *ldb, double *sOUT, + double *rcondIN, integer *rankOUT, double_complex *work, + integer *lwork, integer *infoOUT) { + Tensor rwork((5*min(*m,*n))); #if MADNESS_LINALG_USE_LAPACKE - cpstrf_(UPLO, n, to_cptr(a), lda, piv, rank, tol, reinterpret_cast(work), info); + zgelss_(m, n, nrhs, to_zptr(a), lda, to_zptr(b), ldb, sOUT, + rcondIN, rankOUT, to_zptr(work), lwork, rwork.ptr(), infoOUT); #else - cpstrf_(UPLO, n, a, lda, piv, rank, tol, work, info); + zgelss_(m, n, nrhs, a, lda, b, ldb, sOUT, rcondIN, rankOUT, work, + lwork, rwork.ptr(), infoOUT); #endif } -STATIC inline -void pstrf_(const char * UPLO,integer *n, complex_real8 *a ,integer* lda, integer *piv, integer* rank, real8* tol, complex_real8* work , integer *info){ + +/// These oddly-named wrappers enable the generic ggev interface to get +/// the correct LAPACK routine based upon the argument type. Internal +/// use only. + +STATIC inline void ggev_(const char* jobl, const char* jobr, integer *n, + real4 *a, integer *lda, real4 *b, integer *ldb, + real4 *w_real, real4 *w_imag, real4 *beta, + real4 *vl, integer *ldvl, real4 *vr, integer *ldvr, + real4 *work, integer *lwork, integer *info, + char_len jobzlen, char_len uplo_len) { #if MADNESS_LINALG_USE_LAPACKE - //zpstrf_(UPLO, n, to_zptr(a), lda, piv, rank, tol, to_cptr(work), info); - zpstrf_(UPLO, n, to_zptr(a), lda, piv, rank, tol, reinterpret_cast(work), info); + sggev_(jobl, jobr, n, a, lda, b, ldb, w_real, w_imag, beta, vl, ldvl, vr, ldvr, work, lwork, info); #else - zpstrf_(UPLO, n, a, lda, piv, rank, tol, work, info); + sggev_(jobl, jobr, n, a, lda, b, ldb, w_real, w_imag, beta, vl, ldvl, vr, ldvr, work, lwork, info, + jobzlen, uplo_len); #endif } -/// These oddly-named wrappers enable the generic geqrf iterface to get -/// the correct LAPACK routine based upon the argument type. Internal -/// use only. -STATIC inline -void dgeqrf_(integer *m, integer *n, - real4 *a, integer *lda, real4 *tau, - real4 *work, integer *lwork, integer *infoOUT) { +STATIC inline void ggev_(const char* jobl, const char* jobr, integer *n, + real8 *a, integer *lda, real8 *b, integer *ldb, + real8 *w_real, real8 *w_imag, real8 *beta, + real8 *vl, integer *ldvl, real8 *vr, integer *ldvr, + real8 *work, integer *lwork, integer *info, + char_len jobzlen, char_len uplo_len) { #if MADNESS_LINALG_USE_LAPACKE - sgeqrf_(m, n, a, lda, tau, work, lwork, infoOUT); + dggev_(jobl, jobr, n, a, lda, b, ldb, w_real, w_imag, beta, vl, ldvl, vr, ldvr, work, lwork, info); #else - sgeqrf_(m, n, a, lda, tau, work, lwork, infoOUT); + dggev_(jobl, jobr, n, a, lda, b, ldb, w_real, w_imag, beta, vl, ldvl, vr, ldvr, work, lwork, info, + jobzlen, uplo_len); #endif } -//STATIC inline -//void dgeqrf_(integer *m, integer *n, -// real8 *a, integer *lda, real8 *tau, -// real8 *work, integer *lwork, integer *infoOUT) { -//#if MADNESS_LINALG_USE_LAPACKE -// dgeqrf_(LAPACK_ROW_MAJOR, m, n, a, lda, tau); -//#else -// dgeqrf_(m, n, a, lda, tau, work, lwork, infoOUT); -//#endif -//} - -STATIC inline -void dgeqrf_(integer *m, integer *n, - float_complex *a, integer *lda, float_complex *tau, - float_complex *work, integer *lwork, integer *infoOUT) { +STATIC inline void ggev_(const char* jobl, const char* jobr, integer *n, + complex_real4 *a, integer *lda, complex_real4 *b, integer *ldb, + complex_real4 *w, complex_real4 *w_imag, complex_real4 *beta, + complex_real4 *vl, integer *ldvl, complex_real4 *vr, integer *ldvr, + complex_real4 *work, integer *lwork, integer *info, + char_len jobzlen, char_len uplo_len) { + Tensor rwork(max((integer) 1, (integer) (2* (*n)))); #if MADNESS_LINALG_USE_LAPACKE - cgeqrf_(m, n, to_cptr(a), lda, to_cptr(tau), to_cptr(work), lwork, infoOUT); + cggev_(jobl, jobr, n, reinterpret_cast(a), lda, + reinterpret_cast(b), ldb, + reinterpret_cast(w), + reinterpret_cast(beta), + reinterpret_cast(vl), ldvl, + reinterpret_cast(vr), ldvr, + reinterpret_cast(work), lwork, rwork.ptr(), info); #else - cgeqrf_(m, n, a, lda, tau, work, lwork, infoOUT); + cggev_(jobl, jobr, n, a, lda, b, ldb, w, beta, vl, ldvl, vr, ldvr, work, lwork, rwork.ptr(), info, + jobzlen, uplo_len); + #endif } -STATIC inline -void dgeqrf_(integer *m, integer *n, - double_complex *a, integer *lda, double_complex *tau, - double_complex *work, integer *lwork, integer *infoOUT) { +STATIC inline void ggev_(const char* jobl, const char* jobr, integer *n, + complex_real8 *a, integer *lda, complex_real8 *b, integer *ldb, + complex_real8 *w, complex_real8 *w_imag, complex_real8 *beta, + complex_real8 *vl, integer *ldvl, complex_real8 *vr, integer *ldvr, + complex_real8 *work, integer *lwork, integer *info, + char_len jobzlen, char_len uplo_len) { + Tensor rwork(max((integer) 1, (integer) (2* (*n)))); #if MADNESS_LINALG_USE_LAPACKE - zgeqrf_(m, n, to_zptr(a), lda, to_zptr(tau), to_zptr(work), lwork, infoOUT); + zggev_(jobl, jobr, n, reinterpret_cast(a), lda, + reinterpret_cast(b), ldb, + reinterpret_cast(w), + reinterpret_cast(beta), + reinterpret_cast(vl), ldvl, + reinterpret_cast(vr), ldvr, + reinterpret_cast(work), lwork, rwork.ptr(), info); #else - zgeqrf_(m, n, a, lda, tau, work, lwork, infoOUT); + zggev_(jobl, jobr, n, a, lda, b, ldb, w, beta, vl, ldvl, vr, ldvr, work, lwork, rwork.ptr(), info, + jobzlen, uplo_len); #endif } - - -/// These oddly-named wrappers enable the generic gesv iterface to get -/// the correct LAPACK routine based upon the argument type. Internal +/// These oddly-named wrappers enable the generic geev interface to get +/// the correct LAPACK routine based upon the argument type. Internal /// use only. -STATIC inline void dgesv_(integer* n, integer* nrhs, float* AT, integer* lda, - integer* piv, float* x, integer* ldx, integer* info) { - sgesv_(n, nrhs, AT, lda, piv, x, ldx, info); -} -STATIC inline void dgesv_(integer* n, integer* nrhs, float_complex* AT, integer* lda, - integer* piv, float_complex* x, integer* ldx, integer* info) { + +STATIC inline void geev_(const char* jobz, const char* uplo, integer *n, + real4 *a, integer *lda, real4 *w_real, real4 *w_imag, + real4 *v, integer *ldv, real4 *vr, integer *ldvr, + real4 *work, integer *lwork, integer *info, + char_len jobzlen, char_len uplo_len) { #if MADNESS_LINALG_USE_LAPACKE - cgesv_(n, nrhs, to_cptr(AT), lda, piv, to_cptr(x), ldx, info); + sgeev_(jobz, uplo, n, a, lda, w_real, w_imag, v, ldv, vr, ldvr, work, lwork, info); #else - cgesv_(n, nrhs, AT, lda, piv, x, ldx, info); + sgeev_(jobz, uplo, n, a, lda, w_real, w_imag, v, ldv, vr, ldvr, work, lwork, info, + jobzlen, uplo_len); #endif } -STATIC inline void dgesv_(integer* n, integer* nrhs, double_complex* AT, integer* lda, - integer* piv, double_complex* x, integer* ldx, integer* info) { + +STATIC inline void geev_(const char* jobz, const char* uplo, integer *n, + real8 *a, integer *lda, real8 *w_real, real8 *w_imag, + real8 *v, integer *ldv, real8 *vr, integer *ldvr, + real8 *work, integer *lwork, integer *info, + char_len jobzlen, char_len uplo_len) { #if MADNESS_LINALG_USE_LAPACKE - zgesv_(n, nrhs, to_zptr(AT), lda, piv, to_zptr(x), ldx, info); + dgeev_(jobz, uplo, n, a, lda, w_real, w_imag, v, ldv, vr, ldvr, work, lwork, info); #else - zgesv_(n, nrhs, AT, lda, piv, x, ldx, info); + dgeev_(jobz, uplo, n, a, lda, w_real, w_imag, v, ldv, vr, ldvr, work, lwork, info, + jobzlen, uplo_len); #endif } -/// These oddly-named wrappers enable the generic gelss iterface to get -/// the correct LAPACK routine based upon the argument type. Internal -/// use only. -STATIC inline void dgelss_(integer *m, integer *n, integer *nrhs, - float *a, integer *lda, float *b, integer *ldb, float *sOUT, - float *rcondIN, integer *rankOUT, float *work, - integer *lwork, integer *infoOUT) { - sgelss_(m, n, nrhs, a, lda, b, ldb, sOUT, rcondIN, rankOUT, work, lwork, infoOUT); -} - -STATIC inline void dgelss_(integer *m, integer *n, integer *nrhs, - float_complex *a, integer *lda, float_complex *b, - integer *ldb, float *sOUT, - float *rcondIN, integer *rankOUT, float_complex *work, - integer *lwork, integer *infoOUT) { - Tensor rwork((5*min(*m,*n))); +STATIC inline void geev_(const char* jobz, const char* uplo, integer *n, + complex_real4 *a, integer *lda, complex_real4 *w, complex_real4 *w_imag, + complex_real4 *v, integer *ldv, complex_real4 *vr, integer *ldvr, + complex_real4 *work, integer *lwork, integer *info, + char_len jobzlen, char_len uplo_len) { + Tensor rwork(max((integer) 1, (integer) (2* (*n)))); #if MADNESS_LINALG_USE_LAPACKE - cgelss_(m, n, nrhs, to_cptr(a), lda, to_cptr(b), ldb, sOUT, - rcondIN, rankOUT, to_cptr(work), lwork, rwork.ptr(),infoOUT); + cgeev_(jobz, uplo, n, reinterpret_cast(a), lda, + reinterpret_cast(w), + reinterpret_cast(v), ldv, + reinterpret_cast(vr), ldvr, + reinterpret_cast(work), lwork, rwork.ptr(), info); #else - cgelss_(m, n, nrhs, a, lda, b, ldb, sOUT, rcondIN, rankOUT, work, - lwork, rwork.ptr(),infoOUT); + cgeev_(jobz, uplo, n, a, lda, w, v, ldv, vr, ldvr, work, lwork, rwork.ptr(), info, + jobzlen, uplo_len); #endif } -STATIC inline void dgelss_(integer *m, integer *n, integer *nrhs, - double_complex *a, integer *lda, double_complex *b, - integer *ldb, double *sOUT, - double *rcondIN, integer *rankOUT, double_complex *work, - integer *lwork, integer *infoOUT) { - Tensor rwork((5*min(*m,*n))); +STATIC inline void geev_(const char* jobz, const char* uplo, integer *n, + complex_real8 *a, integer *lda, complex_real8 *w, complex_real8 *w_imag, + complex_real8 *v, integer *ldv, complex_real8 *vr, integer *ldvr, + complex_real8 *work, integer *lwork, integer *info, + char_len jobzlen, char_len uplo_len) { + Tensor rwork(max((integer) 1, (integer) (2* (*n)))); #if MADNESS_LINALG_USE_LAPACKE - zgelss_(m, n, nrhs, to_zptr(a), lda, to_zptr(b), ldb, sOUT, - rcondIN, rankOUT, to_zptr(work), lwork, rwork.ptr(),infoOUT); + zgeev_(jobz, uplo, n, reinterpret_cast(a), lda, + reinterpret_cast(w), + reinterpret_cast(v), ldv, + reinterpret_cast(vr), ldvr, + reinterpret_cast(work), lwork, rwork.ptr(), info); #else - zgelss_(m, n, nrhs, a, lda, b, ldb, sOUT, rcondIN, rankOUT, work, - lwork, rwork.ptr(),infoOUT); + zgeev_(jobz, uplo, n, a, lda, w, v, ldv, vr, ldvr, work, lwork, rwork.ptr(), info, + jobzlen, uplo_len); #endif } -/// These oddly-named wrappers enable the generic sygv/hegv iterface to get -/// the correct LAPACK routine based upon the argument type. Internal +/// These oddly-named wrappers enable the generic sygv/hegv interface to get +/// the correct LAPACK routine based upon the argument type. Internal /// use only. -STATIC inline -void dsygv_(integer *itype, const char* jobz, const char* uplo, integer *n, - real4 *a, integer *lda, real4 *b, integer *ldb, - real4 *w, real4 *work, integer *lwork, - integer *info, char_len jobzlen, char_len uplo_len ) { + +STATIC inline void sygv_(integer *itype, const char* jobz, const char* uplo, integer *n, + real4 *a, integer *lda, real4 *b, integer *ldb, + real4 *w, real4 *work, integer *lwork, + integer *info, char_len jobzlen, char_len uplo_len ) { #if MADNESS_LINALG_USE_LAPACKE ssygv(itype, jobz, uplo, n, a, lda, b, ldb, w, work, lwork, info); #else - ssygv_(itype, jobz, uplo, n, - a, lda, b, ldb, w, work, lwork, info, + ssygv_(itype, jobz, uplo, n, a, lda, b, ldb, w, work, lwork, info, jobzlen,uplo_len); #endif } +STATIC inline void sygv_(integer *itype, const char* jobz, const char* uplo, integer *n, + real8 *a, integer *lda, real8 *b, integer *ldb, + real8 *w, real8 *work, integer *lwork, + integer *info, char_len jobzlen, char_len uplo_len ) { #if MADNESS_LINALG_USE_LAPACKE -STATIC inline -void dsygv_(integer *itype, const char* jobz, const char* uplo, integer *n, - real8 *a, integer *lda, real8 *b, integer *ldb, - real8 *w, real8 *work, integer *lwork, - integer *info, char_len jobzlen, char_len uplo_len ) { - dsygv(itype, jobz, uplo, n, a, lda, b, ldb, w, work, lwork, info); -} + dsygv(itype, jobz, uplo, n, a, lda, b, ldb, w, work, lwork, info); +#else + dsygv_(itype, jobz, uplo, n, a, lda, b, ldb, w, work, lwork, info, + jobzlen, uplo_len); #endif +} -STATIC inline -void dsygv_(integer *itype, const char* jobz, const char* uplo, integer *n, - complex_real4 *a, integer *lda, complex_real4 *b, integer *ldb, - real4 *w, complex_real4 *work, integer *lwork, - integer *info, char_len jobzlen, char_len uplo_len ) { +STATIC inline void sygv_(integer *itype, const char* jobz, const char* uplo, integer *n, + complex_real4 *a, integer *lda, complex_real4 *b, integer *ldb, + real4 *w, complex_real4 *work, integer *lwork, + integer *info, char_len jobzlen, char_len uplo_len ) { Tensor rwork(max((integer) 1, (integer) (3*(*n)-2))); #if MADNESS_LINALG_USE_LAPACKE chegv_(itype, jobz, uplo, n, to_cptr(a), lda, to_cptr(b), - ldb, w, to_cptr(work), lwork, rwork.ptr(), info); + ldb, w, to_cptr(work), lwork, rwork.ptr(), info); #else - chegv_(itype, jobz, uplo, n, - a, lda, b, ldb, w, work, lwork, rwork.ptr(), info, + chegv_(itype, jobz, uplo, n, a, lda, b, ldb, w, work, lwork, rwork.ptr(), info, jobzlen, uplo_len); #endif } -STATIC inline -void dsygv_(integer *itype, const char* jobz, const char* uplo, integer *n, - complex_real8 *a, integer *lda, complex_real8 *b, integer *ldb, - real8 *w, complex_real8 *work, integer *lwork, - integer *info, char_len jobzlen, char_len uplo_len ) { +STATIC inline void sygv_(integer *itype, const char* jobz, const char* uplo, integer *n, + complex_real8 *a, integer *lda, complex_real8 *b, integer *ldb, + real8 *w, complex_real8 *work, integer *lwork, + integer *info, char_len jobzlen, char_len uplo_len ) { Tensor rwork(max((integer) 1, (integer) (3*(*n)-2))); #if MADNESS_LINALG_USE_LAPACKE zhegv_(itype, jobz, uplo,n, to_zptr(a), lda, to_zptr(b), - ldb, w, to_zptr(work), lwork, rwork.ptr(), info); + ldb, w, to_zptr(work), lwork, rwork.ptr(), info); #else - zhegv_(itype, jobz, uplo, n, - a, lda, b, ldb, w, work, lwork, rwork.ptr(), info, + zhegv_(itype, jobz, uplo, n, a, lda, b, ldb, w, work, lwork, rwork.ptr(), info, jobzlen, uplo_len); #endif } -/// These oddly-named wrappers enable the generic syev/heev iterface to get -/// the correct LAPACK routine based upon the argument type. Internal +/// These oddly-named wrappers enable the generic syev/heev interface to get +/// the correct LAPACK routine based upon the argument type. Internal /// use only. -STATIC inline void dsyev_(const char* jobz, const char* uplo, integer *n, - real4 *a, integer *lda, real4 *w, real4 *work, integer *lwork, - integer *info, char_len jobzlen, char_len uplo_len ) { + +STATIC inline void syev_(const char* jobz, const char* uplo, integer *n, + real4 *a, integer *lda, real4 *w, real4 *work, integer *lwork, + integer *info, char_len jobzlen, char_len uplo_len ) { #if MADNESS_LINALG_USE_LAPACKE - ssyev_(jobz, uplo, n, a, lda, w, work, lwork, info); + ssyev_(jobz, uplo, n, a, lda, w, work, lwork, info); #else - ssyev_(jobz, uplo, n, a, lda, w, work, lwork, info, jobzlen, uplo_len ); + ssyev_(jobz, uplo, n, a, lda, w, work, lwork, info, jobzlen, uplo_len); #endif } +STATIC inline void syev_(const char* jobz, const char* uplo, integer *n, + real8 *a, integer *lda, real8 *w, real8 *work, integer *lwork, + integer *info, char_len jobzlen, char_len uplo_len ) { #if MADNESS_LINALG_USE_LAPACKE -STATIC inline void dsyev_(const char* jobz, const char* uplo, integer *n, - real8 *a, integer *lda, real8 *w, real8 *work, integer *lwork, - integer *info, char_len jobzlen, char_len uplo_len ) { - dsyev_(jobz, uplo, n, a, lda, w, work, lwork, info); -} + dsyev_(jobz, uplo, n, a, lda, w, work, lwork, info); +#else + dsyev_(jobz, uplo, n, a, lda, w, work, lwork, info, jobzlen, uplo_len); #endif +} -STATIC void dsyev_(const char* jobz, const char* uplo, integer *n, - complex_real4 *a, integer *lda, real4 *w, - complex_real4 *work, integer *lwork, - integer *info, char_len jobzlen, char_len uplo_len ) { +STATIC inline void syev_(const char* jobz, const char* uplo, integer *n, + complex_real4 *a, integer *lda, real4 *w, + complex_real4 *work, integer *lwork, + integer *info, char_len jobzlen, char_len uplo_len ) { Tensor rwork(max((integer) 1, (integer) (3* (*n)-2))); //std::cout << *n << " " << *lda << " " << *lwork < rwork(max((integer) 1, (integer) (3* (*n)-2))); #if MADNESS_LINALG_USE_LAPACKE zheev_(jobz, uplo, n, to_zptr(a), lda, w, to_zptr(work), lwork, rwork.ptr(), info); #else zheev_(jobz, uplo, n, a, lda, w, work, lwork, rwork.ptr(), - info, jobzlen, uplo_len ); + info, jobzlen, uplo_len); #endif } -// bryan edits start -/// These oddly-named wrappers enable the generic geev iterface to get -/// the correct LAPACK routine based upon the argument type. Internal +/// These oddly-named wrappers enable the generic geqrf interface to get +/// the correct LAPACK routine based upon the argument type. Internal /// use only. -STATIC inline void dgeev_(const char* jobz, const char* uplo, integer *n, - real4 *a, integer *lda, real4 *w_real, real4 *w_imag, - real4 *v, integer *ldv, real4 *vr, integer *ldvr, - real4 *work, integer *lwork, integer *info, - char_len jobzlen, char_len uplo_len) { + +STATIC inline void geqrf_(integer *m, integer *n, + real4 *a, integer *lda, real4 *tau, + real4 *work, integer *lwork, integer *infoOUT) { + sgeqrf_(m, n, a, lda, tau, work, lwork, infoOUT); +} + +STATIC inline void geqrf_(integer *m, integer *n, + real8 *a, integer *lda, real8 *tau, + real8 *work, integer *lwork, integer *infoOUT) { #if MADNESS_LINALG_USE_LAPACKE - sgeev_(jobz, uplo, n, a, lda, w_real, w_imag, v, ldv, vr, ldvr, work, lwork, info ); + dgeqrf_(LAPACK_ROW_MAJOR, m, n, a, lda, tau, infoOUT); #else - sgeev_(jobz, uplo, n, a, lda, w_real, w_imag, v, ldv, vr, ldvr, work, lwork, info, - jobzlen, uplo_len ); + dgeqrf_(m, n, a, lda, tau, work, lwork, infoOUT); #endif } +STATIC inline void geqrf_(integer *m, integer *n, + float_complex *a, integer *lda, float_complex *tau, + float_complex *work, integer *lwork, integer *infoOUT) { #if MADNESS_LINALG_USE_LAPACKE -STATIC inline void dgeev_(const char* jobz, const char* uplo, integer *n, - real8 *a, integer *lda, real8 *w_real, real8 *w_imag, - real8 *v, integer *ldv, real8 *vr, integer *ldvr, - real8 *work, integer *lwork, integer *info, - char_len jobzlen, char_len uplo_len) { - dgeev_(jobz, uplo, n, a, lda, w_real, w_imag, v, ldv, vr, ldvr, work, lwork, info ); + cgeqrf_(m, n, to_cptr(a), lda, to_cptr(tau), to_cptr(work), lwork, infoOUT); +#else + cgeqrf_(m, n, a, lda, tau, work, lwork, infoOUT); +#endif } + +STATIC inline void geqrf_(integer *m, integer *n, + double_complex *a, integer *lda, double_complex *tau, + double_complex *work, integer *lwork, integer *infoOUT) { +#if MADNESS_LINALG_USE_LAPACKE + zgeqrf_(m, n, to_zptr(a), lda, to_zptr(tau), to_zptr(work), lwork, infoOUT); +#else + zgeqrf_(m, n, a, lda, tau, work, lwork, infoOUT); #endif +} -STATIC inline void dgeev_(const char* jobz, const char* uplo, integer *n, - complex_real4 *a, integer *lda, complex_real4 *w, complex_real4 *w_imag, - complex_real4 *v, integer *ldv, complex_real4 *vr, integer *ldvr, - complex_real4 *work, integer *lwork, integer *info, - char_len jobzlen, char_len uplo_len) { - Tensor rwork(max((integer) 1, (integer) (2* (*n)))); +STATIC inline void geqp3_(integer *m, integer *n, + real4 *a, integer *lda, integer *jpvt, real4 *tau, + real4 *work, integer *lwork, integer *infoOUT){ + sgeqp3_(m, n, a, lda, jpvt, tau, work, lwork, infoOUT); +} + +STATIC inline void geqp3_(integer *m, integer *n, + real8 *a, integer *lda, integer *jpvt, real8 *tau, + real8 *work, integer *lwork, integer *infoOUT){ + dgeqp3_(m, n, a, lda, jpvt, tau, work, lwork, infoOUT); +} + +STATIC inline void geqp3_(integer *m, integer *n, + complex_real4 *a, integer *lda, integer *jpvt, complex_real4 *tau, + complex_real4 *work, integer *lwork, integer *infoOUT){ + Tensor rwork((integer) (2* (*n))); #if MADNESS_LINALG_USE_LAPACKE - cgeev_(jobz, uplo, n, reinterpret_cast(a), lda, - reinterpret_cast(w), - reinterpret_cast(v), ldv, - reinterpret_cast(vr), ldvr, - reinterpret_cast(work), lwork, rwork.ptr(), info ); + cgeqp3_(m, n, to_cptr(a), lda, jpvt, to_cptr(tau), to_cptr(work), lwork, rwork.ptr(), infoOUT); #else - cgeev_(jobz, uplo, n, a, lda, w, v, ldv, vr, ldvr, work, lwork, rwork.ptr(), info, - jobzlen, uplo_len ); + cgeqp3_(m, n, a, lda, jpvt, tau, work, lwork, rwork.ptr(), infoOUT); +#endif +} + +STATIC inline void geqp3_(integer *m, integer *n, + complex_real8 *a, integer *lda, integer *jpvt, complex_real8 *tau, + complex_real8 *work, integer *lwork, integer *infoOUT){ + Tensor rwork((integer) (2* (*n))); +#if MADNESS_LINALG_USE_LAPACKE + zgeqp3_(m, n, to_zptr(a), lda, jpvt, to_zptr(tau), to_zptr(work), lwork, rwork.ptr(), infoOUT); +#else + zgeqp3_(m, n, a, lda, jpvt, tau, work, lwork, rwork.ptr(), infoOUT); #endif } +/// These oddly-named wrappers enable the generic orgqr/unggr interface to get +/// the correct LAPACK routine based upon the argument type. Internal +/// use only. -STATIC inline void dgeev_(const char* jobz, const char* uplo, integer *n, - complex_real8 *a, integer *lda, complex_real8 *w, complex_real8 *w_imag, - complex_real8 *v, integer *ldv, complex_real8 *vr, integer *ldvr, - complex_real8 *work, integer *lwork, integer *info, - char_len jobzlen, char_len uplo_len) { - Tensor rwork(max((integer) 1, (integer) (2* (*n)))); +STATIC inline void orgqr_(integer *m, integer *n, integer *k, + real4 *a, integer *lda, real4 *tau, + real4 *work, integer *lwork, integer *info) { + sorgqr_(m, n, k, a, m, tau, work, lwork, info); +} + +STATIC inline void orgqr_(integer *m, integer *n, integer *k, + real8 *a, integer *lda, real8 *tau, + real8 *work, integer *lwork, integer *info) { + dorgqr_(m, n, k, a, m, tau, work, lwork, info); +} + +STATIC inline void orgqr_(integer *m, integer *n, integer *k, + complex_real4 *a, integer *lda, complex_real4 *tau, + complex_real4 *work, integer *lwork, integer *info) { #if MADNESS_LINALG_USE_LAPACKE - zgeev_(jobz, uplo, n, reinterpret_cast(a), lda, - reinterpret_cast(w), - reinterpret_cast(v), ldv, - reinterpret_cast(vr), ldvr, - reinterpret_cast(work), lwork, rwork.ptr(), info ); + cungqr_(m, n, k, to_cptr(a), lda, + to_cptr(tau), + to_cptr(work), lwork, info); #else - zgeev_(jobz, uplo, n, a, lda, w, v, ldv, vr, ldvr, work, lwork, rwork.ptr(), info, - jobzlen, uplo_len ); + cungqr_(m, n, k, a, m, tau, work, lwork, info); +#endif +} + +STATIC inline void orgqr_(integer *m, integer *n, integer *k, + complex_real8 *a, integer *lda, complex_real8 *tau, + complex_real8 *work, integer *lwork, integer *info) { +#if MADNESS_LINALG_USE_LAPACKE + zungqr_(m,n,k, to_zptr(a), m, + to_zptr(tau), + to_zptr(work), lwork, info); +#else + zungqr_(m, n, k, a, m, tau, work, lwork, info); #endif } -/// These oddly-named wrappers enable the generic ggev iterface to get -/// the correct LAPACK routine based upon the argument type. Internal +/// These oddly-named wrappers enable the generic cholesky interface to get +/// the correct LAPACK routine based upon the argument type. Internal /// use only. -STATIC inline void dggev_(const char* jobl, const char* jobr, integer *n, - real4 *a, integer *lda, real4 *b, integer *ldb, - real4 *w_real, real4 *w_imag, real4 *beta, - real4 *vl, integer *ldvl, real4 *vr, integer *ldvr, - real4 *work, integer *lwork, integer *info, - char_len jobzlen, char_len uplo_len) { + +STATIC inline void potrf_(const char * UPLO, integer *n, + real4 *a ,integer *lda , integer *info){ #if MADNESS_LINALG_USE_LAPACKE - sggev_(jobl, jobr, n, a, lda, b, ldb, w_real, w_imag, beta, vl, ldvl, vr, ldvr, work, lwork, info ); + spotrf_(UPLO, n, a, lda, info); #else - sggev_(jobl, jobr, n, a, lda, b, ldb, w_real, w_imag, beta, vl, ldvl, vr, ldvr, work, lwork, info, - jobzlen, uplo_len ); + spotrf_(UPLO, n, a, lda, info, 1); +#endif +} + +STATIC inline void potrf_(const char * UPLO, integer *n, + real8 *a ,integer *lda , integer *info){ +#if MADNESS_LINALG_USE_LAPACKE + dpotrf_(UPLO, n, a, lda, info); +#else + dpotrf_(UPLO, n, a, lda, info, 1); #endif } +STATIC inline void potrf_(const char * UPLO, integer *n, + complex_real4 *a ,integer *lda , integer *info){ #if MADNESS_LINALG_USE_LAPACKE -STATIC inline void dggev_(const char* jobl, const char* jobr, integer *n, - real8 *a, integer *lda, real8 *b, integer *ldb, - real8 *w_real, real8 *w_imag, real8 *beta, - real8 *vl, integer *ldvl, real8 *vr, integer *ldvr, - real8 *work, integer *lwork, integer *info, - char_len jobzlen, char_len uplo_len) { - dggev_(jobl, jobr, n, a, lda, b, ldb, w_real, w_imag, beta, vl, ldvl, vr, ldvr, work, lwork, info ); + cpotrf_(UPLO, n, to_cptr(a), lda, info); +#else + cpotrf_(UPLO, n, a, lda, info, 1); +#endif } +STATIC inline void potrf_(const char * UPLO, integer *n, + complex_real8 *a ,integer *lda , integer *info){ +#if MADNESS_LINALG_USE_LAPACKE + zpotrf_(UPLO, n, to_zptr(a), lda, info); +#else + zpotrf_(UPLO, n, a, lda, info, 1); #endif +} -STATIC inline void dggev_(const char* jobl, const char* jobr, integer *n, - complex_real4 *a, integer *lda, complex_real4 *b, integer *ldb, - complex_real4 *w, complex_real4 *w_imag, complex_real4 *beta, - complex_real4 *vl, integer *ldvl, complex_real4 *vr, integer *ldvr, - complex_real4 *work, integer *lwork, integer *info, - char_len jobzlen, char_len uplo_len) { - Tensor rwork(max((integer) 1, (integer) (2* (*n)))); +/// These oddly-named wrappers enable the generic rr_cholesky interface to get +/// the correct LAPACK routine based upon the argument type. Internal +/// use only. + +STATIC inline void pstrf_(const char * UPLO, integer *n, + real4 *a ,integer* lda, integer *piv, integer* rank, + real4* tol, real4* work , integer *info){ + spstrf_(UPLO, n, a, lda, piv, rank, tol, work, info); +} + +STATIC inline void pstrf_(const char * UPLO, integer *n, + real8 *a ,integer* lda, integer *piv, integer* rank, + real8* tol, real8* work , integer *info){ + dpstrf_(UPLO, n, a, lda, piv, rank, tol, work, info); +} + +STATIC inline void pstrf_(const char * UPLO, integer *n, + complex_real4 *a ,integer* lda, integer *piv, integer* rank, + real4* tol, complex_real4* work , integer *info){ #if MADNESS_LINALG_USE_LAPACKE - cggev_(jobl, jobr, n, reinterpret_cast(a), lda, - reinterpret_cast(b), ldb, - reinterpret_cast(w), - reinterpret_cast(beta), - reinterpret_cast(vl), ldvl, - reinterpret_cast(vr), ldvr, - reinterpret_cast(work), lwork, rwork.ptr(), info ); + cpstrf_(UPLO, n, to_cptr(a), lda, piv, rank, tol, reinterpret_cast(work), info); #else - cggev_(jobl, jobr, n, a, lda, b, ldb, w, beta, vl, ldvl, vr, ldvr, work, lwork, rwork.ptr(), info, - jobzlen, uplo_len ); + cpstrf_(UPLO, n, a, lda, piv, rank, tol, work, info); +#endif +} +STATIC inline void pstrf_(const char * UPLO, integer *n, + complex_real8 *a ,integer* lda, integer *piv, integer* rank, + real8* tol, complex_real8* work , integer *info){ +#if MADNESS_LINALG_USE_LAPACKE + //zpstrf_(UPLO, n, to_zptr(a), lda, piv, rank, tol, to_cptr(work), info); + zpstrf_(UPLO, n, to_zptr(a), lda, piv, rank, tol, reinterpret_cast(work), info); +#else + zpstrf_(UPLO, n, a, lda, piv, rank, tol, work, info); #endif } +/// These oddly-named wrappers enable the generic triangular factor to get +/// the correct LAPACK routine based upon the argument type. Internal +/// use only. -STATIC inline void dggev_(const char* jobl, const char* jobr, integer *n, - complex_real8 *a, integer *lda, complex_real8 *b, integer *ldb, - complex_real8 *w, complex_real8 *w_imag, complex_real8 *beta, - complex_real8 *vl, integer *ldvl, complex_real8 *vr, integer *ldvr, - complex_real8 *work, integer *lwork, integer *info, - char_len jobzlen, char_len uplo_len) { - Tensor rwork(max((integer) 1, (integer) (2* (*n)))); +STATIC inline void getrf_(integer* m, integer* n, real4 *a, integer *lda, + integer* ipiv, integer *info){ + sgetrf_(m, n, a, lda, ipiv, info); +} + +STATIC inline void getrf_(integer* m, integer* n, real8 *a, integer *lda, + integer* ipiv, integer *info){ + dgetrf_(m, n, a, lda, ipiv, info); +} + +STATIC inline void getrf_(integer* m, integer* n, complex_real4 *a, integer *lda, + integer* ipiv, integer *info){ #if MADNESS_LINALG_USE_LAPACKE - zggev_(jobl, jobr, n, reinterpret_cast(a), lda, - reinterpret_cast(b), ldb, - reinterpret_cast(w), - reinterpret_cast(beta), - reinterpret_cast(vl), ldvl, - reinterpret_cast(vr), ldvr, - reinterpret_cast(work), lwork, rwork.ptr(), info ); + cgetrf_(m, n, to_cptr(a), lda, ipiv, info); #else - zggev_(jobl, jobr, n, a, lda, b, ldb, w, beta, vl, ldvl, vr, ldvr, work, lwork, rwork.ptr(), info, - jobzlen, uplo_len ); + cgetrf_(m, n, a, lda, ipiv, info); #endif } -// bryan edits end +STATIC inline void getrf_(integer* m, integer* n, complex_real8 *a, integer *lda, + integer* ipiv, integer *info){ +#if MADNESS_LINALG_USE_LAPACKE + zgetrf_(m, n, to_zptr(a), lda, ipiv, info); +#else + zgetrf_(m, n, a, lda, ipiv, info); +#endif +} -/// These oddly-named wrappers enable the generic orgqr/unggr iterface to get -/// the correct LAPACK routine based upon the argument type. Internal +/// These oddly-named wrappers enable the generic triangular inverse to get +/// the correct LAPACK routine based upon the argument type. Internal /// use only. -STATIC inline void dorgqr_(integer *m, integer *n, integer *k, - real4 *a, integer *lda, real4 *tau, - real4 *work, integer *lwork, integer *info) { - sorgqr_(m, n, k, a, m, tau, work, lwork, info); + +STATIC inline void getri_(integer* n, real4 *a, integer *lda, integer* ipiv, + real4 *work, integer *lwork, integer *info){ + sgetri_(n, a, lda, ipiv, work, lwork, info); } -STATIC void dorgqr_(integer *m, integer *n, integer *k, - complex_real4 *a, integer *lda, complex_real4 *tau, - complex_real4 *work, integer *lwork, integer *info) { +STATIC inline void getri_(integer* n, real8 *a, integer *lda, integer* ipiv, + real8 *work, integer *lwork, integer *info){ + dgetri_(n, a, lda, ipiv, work, lwork, info); +} + +STATIC inline void getri_(integer* n, complex_real4 *a, integer *lda, integer* ipiv, + complex_real4 *work, integer *lwork, integer *info){ #if MADNESS_LINALG_USE_LAPACKE - cungqr_(m, n, k, to_cptr(a), lda, - to_cptr(tau), - to_cptr(work), lwork, info); + cgetri_(n, to_cptr(a), lda, ipiv, to_cptr(work), lwork, info); #else - cungqr_(m, n, k, a, m, tau, work, lwork, info); + cgetri_(n, a, lda, ipiv, work, lwork, info); #endif } -STATIC void dorgqr_(integer *m, integer *n, integer *k, - complex_real8 *a, integer *lda, complex_real8 *tau, - complex_real8 *work, integer *lwork, integer *info) { +STATIC inline void getri_(integer* n, complex_real8 *a, integer *lda, integer* ipiv, + complex_real8 *work, integer *lwork, integer *info){ #if MADNESS_LINALG_USE_LAPACKE - zungqr_(m,n,k, to_zptr(a), m, - to_zptr(tau), - to_zptr(work), lwork, info); + zgetri_(n, to_zptr(a), lda, ipiv, to_zptr(work), lwork, info); #else - zungqr_(m, n, k, a, m, tau, work, lwork, info); + zgetri_(n, a, lda, ipiv, work, lwork, info); #endif } @@ -633,7 +751,7 @@ namespace madness { s = Tensor< typename Tensor::scalar_type >(rmax); U = Tensor(m,rmax); VT = Tensor(rmax,n); - dgesvd_("S","S", &n, &m, A.ptr(), &n, s.ptr(), + gesvd_("S","S", &n, &m, A.ptr(), &n, s.ptr(), VT.ptr(), &n, U.ptr(), &rmax, work.ptr(), &lwork, &info, (char_len) 1, (char_len) 1); @@ -666,7 +784,7 @@ namespace madness { integer info; // calling list is swapped - dgesvd_("O", "S", &n, &m, a.ptr(), &n, s.ptr(), + gesvd_("O", "S", &n, &m, a.ptr(), &n, s.ptr(), VT.ptr(), &n, U.ptr(), &rmax, work.ptr(), &lwork, &info, (char_len) 1, (char_len) 1); @@ -705,7 +823,7 @@ namespace madness { integer info; // note overriding of dgesv for other types above - dgesv_(&n, &nrhs, AT.ptr(), &n, piv.ptr(), x.ptr(), &n, &info); + gesv_(&n, &nrhs, AT.ptr(), &n, piv.ptr(), x.ptr(), &n, &info); mask_info(info); TENSOR_ASSERT((info == 0), "gesv failed", info, &a); @@ -726,13 +844,13 @@ namespace madness { // DGETRF computes an LU factorization of a general M-by-N matrix A // using partial pivoting with row interchanges. - dgetrf_(&n,&n,a.ptr(),&n,ipiv.ptr(),&info); + getrf_(&n,&n,a.ptr(),&n,ipiv.ptr(),&info); // DGETRI computes the inverse of a matrix using the LU factorization // computed by DGETRF. integer lwork=(10*n); Tensor work(lwork); - dgetri_(&n,a.ptr(),&n,ipiv.ptr(),work.ptr(),&lwork,&info); + getri_(&n,a.ptr(),&n,ipiv.ptr(),work.ptr(),&lwork,&info); mask_info(info); TENSOR_ASSERT((info == 0), "inverse failed", info, &a); @@ -805,7 +923,7 @@ namespace madness { scalar_type rrcond = rcond; integer rrank=0; - dgelss_(&m, &n, &nrhs, AT.ptr(), &m, lapack_inout.ptr(), &maxmn, + gelss_(&m, &n, &nrhs, AT.ptr(), &m, lapack_inout.ptr(), &maxmn, s.ptr(), &rrcond, &rrank, work.ptr(), &lwork, &info); mask_info(info); TENSOR_ASSERT(info == 0, "gelss failed", info, &a); @@ -862,14 +980,14 @@ namespace madness { Tensor work(lwork); V = transpose(A); // For Hermitian case e = Tensor::scalar_type>(n); - dsyev_("V", "U", &n, V.ptr(), &n, e.ptr(), work.ptr(), &lwork, &info, + syev_("V", "U", &n, V.ptr(), &n, e.ptr(), work.ptr(), &lwork, &info, (char_len) 1, (char_len) 1); mask_info(info); TENSOR_ASSERT(info == 0, "(s/d)syev/(c/z)heev failed", info, &A); V = transpose(V); } -// bryan edits + /** \brief Real non-symmetric or complex non-Hermitian eigenproblem. A is a real non-symmetric or complex non-Hermitian matrix. Return V and e @@ -901,7 +1019,7 @@ namespace madness { Tensor A_copy = copy(A); Tensor VL(n,n); // Should not be referenced Tensor e_real(n), e_imag(n); - dgeev_("N", "V", &n, A_copy.ptr(), &n, e_real.ptr(), e_imag.ptr(), VL.ptr(), &n, + geev_("N", "V", &n, A_copy.ptr(), &n, e_real.ptr(), e_imag.ptr(), VL.ptr(), &n, VR.ptr(), &n, work.ptr(), &lwork, &info, (char_len) 1, (char_len) 1); mask_info(info); TENSOR_ASSERT(info == 0, "(s/d)geev/(c/z)geev failed", info, &A); @@ -910,7 +1028,6 @@ namespace madness { std::complex my_i(0,1); e = e_real + e_imag * my_i; } -// bryan edits end /** \brief Generalized real-symmetric or complex-Hermitian eigenproblem. @@ -950,7 +1067,7 @@ namespace madness { Tensor b = transpose(B); // For Hermitian case V = transpose(A); // For Hermitian case e = Tensor::scalar_type>(n); - dsygv_(&ity, "V", "U", &n, V.ptr(), &n, b.ptr(), &n, + sygv_(&ity, "V", "U", &n, V.ptr(), &n, b.ptr(), &n, e.ptr(), work.ptr(), &lwork, &info, (char_len) 1, (char_len) 1); mask_info(info); @@ -958,7 +1075,6 @@ namespace madness { V = transpose(V); } - // bryan edit start /** \brief Generalized real-non-symmetric or complex-non-Hermitian eigenproblem. This from the LAPACK documentation @@ -1013,7 +1129,7 @@ namespace madness { Tensor A_copy = copy(A); Tensor VL(n,n); // Should not be referenced Tensor e_real(n), e_imag(n), beta(n); - dggev_("N", "V", &n, + ggev_("N", "V", &n, A_copy.ptr(), &n, B.ptr(), &n, e_real.ptr(), e_imag.ptr(), beta.ptr(), VL.ptr(), &n, VR.ptr(), &n, @@ -1048,7 +1164,7 @@ namespace madness { } VR = transpose(VR); } -// bryan edits stop + /** \brief Compute the Cholesky factorization. Compute the Cholesky factorization of the symmetric positive definite matrix A @@ -1143,9 +1259,9 @@ namespace madness { integer lwork=work.size(); integer info; -// dgeqp3(M, N, A, LDA, JPVT, TAU, WORK, LWORK, INFO ); +// dgeqp3(M, N, A, LDA, JPVT, TAU, WORK, LWORK, INFO); std::cout << jpvt[0] << std::endl; - dgeqp3_(&m, &n, A.ptr(), &m, jpvt.ptr(), tau.ptr(), work.ptr(), + geqp3_(&m, &n, A.ptr(), &m, jpvt.ptr(), tau.ptr(), work.ptr(), &lwork, &info); std::cout << jpvt[0] << std::endl; mask_info(info); @@ -1214,7 +1330,7 @@ namespace madness { integer lwork=work.size(); integer info; - dgeqrf_(&m, &n, A.ptr(), &m, tau.ptr(), work.ptr(), + geqrf_(&m, &n, A.ptr(), &m, tau.ptr(), work.ptr(), &lwork, &info); mask_info(info); TENSOR_ASSERT(info == 0, "dgeqrf_: Lapack failed", info, &A); @@ -1246,7 +1362,7 @@ namespace madness { integer k=std::min(m,n);//tau.size(); // size of tau is tau(min(m,n)) integer q_rows=m; integer q_cols= (m>=n) ? n : m; - dorgqr_(&q_rows, &q_cols, &k, A.ptr(), &q_rows, const_cast(tau.ptr()), + orgqr_(&q_rows, &q_cols, &k, A.ptr(), &q_rows, const_cast(tau.ptr()), work.ptr(), &lwork, &info); A=A(Slice(0,q_cols-1),Slice(0,q_rows-1)); // -- use transpose(A) mask_info(info); @@ -1275,7 +1391,7 @@ namespace madness { integer lwork=64*n; Tensor work(lwork); integer info; - dorgqr_(&m, &n, &k, A.ptr(), &m, const_cast(tau.ptr()), + orgqr_(&m, &n, &k, A.ptr(), &m, const_cast(tau.ptr()), work.ptr(), &lwork, &info); mask_info(info); A=transpose(A); @@ -1533,7 +1649,6 @@ namespace madness { cout << "error in double_complex svd " << test_svd(37,19) << endl; cout << endl; - cout << "error in float gelss " << test_gelss(20,30) << endl; cout << "error in double gelss " << test_gelss(30,20) << endl; cout << "error in float_complex gelss " << test_gelss(23,27) << endl; @@ -1546,7 +1661,6 @@ namespace madness { cout << "error in double_complex syev " << test_syev(21) << endl; cout << endl; - cout << "error in float sygv " << test_sygv(20) << endl; cout << "error in double sygv " << test_sygv(20) << endl; cout << "error in float_complex sygv " << test_sygv(23) << endl; @@ -1567,6 +1681,7 @@ namespace madness { cout << endl; cout << "error in double_complex cholesky " << test_cholesky(22) << endl; cout << endl; + cout << "error in float rr_cholesky " << test_rr_cholesky(22) << endl; cout << endl; cout << "error in double rr_cholesky " << test_rr_cholesky(22) << endl; @@ -1584,6 +1699,8 @@ namespace madness { cout << "error in double inverse " << test_inverse(32) << endl; cout << "error in double inverse " << test_inverse(47) << endl; + cout << "error in double_complex inverse " << test_inverse(32) << endl; + cout << "error in double_complex inverse " << test_inverse(47) << endl; cout << endl; } @@ -1605,135 +1722,131 @@ namespace madness { // versions were happy with the instantiations caused by the test code above template - void svd_result(Tensor& a, Tensor& U, - Tensor::scalar_type >& s, Tensor& VT, Tensor& work); + void svd(const Tensor& a, Tensor& U, + Tensor::scalar_type >& s, Tensor& VT); template - void orgqr(Tensor& A, const Tensor& tau); + void svd(const Tensor& a, Tensor& U, + Tensor::scalar_type >& s, Tensor& VT); + template + void svd(const Tensor& a, Tensor& U, + Tensor::scalar_type >& s, Tensor& VT); template - void svd(const Tensor& a, Tensor& U, - Tensor::scalar_type >& s, Tensor& VT); + void svd(const Tensor& a, Tensor& U, + Tensor::scalar_type >& s, Tensor& VT); template - void svd(const Tensor& a, Tensor& U, - Tensor::scalar_type >& s, Tensor& VT); + void svd_result(Tensor& a, Tensor& U, + Tensor::scalar_type >& s, Tensor& VT, Tensor& work); template void svd_result(Tensor& a, Tensor& U, Tensor::scalar_type >& s, Tensor& VT, Tensor& work); template - void gelss(const Tensor& a, const Tensor& b, double rcond, - Tensor& x, Tensor::scalar_type >& s, - long &rank, Tensor::scalar_type>& sumsq); + void svd_result(Tensor& a, Tensor& U, + Tensor::scalar_type >& s, Tensor& VT, + Tensor& work); template - void syev(const Tensor& A, - Tensor& V, Tensor::scalar_type >& e); - + void svd_result(Tensor& a, Tensor& U, + Tensor::scalar_type >& s, Tensor& VT, + Tensor& work); template - void cholesky(Tensor& A); + void gesv(const Tensor& a, const Tensor& b, Tensor& x); template - void rr_cholesky(Tensor& A, typename Tensor::scalar_type tol, Tensor& piv, int& rank); - + void gesv(const Tensor& a, const Tensor& b, Tensor& x); template Tensor inverse(const Tensor& A); template - void qr(Tensor& A, Tensor& R); + Tensor inverse(const Tensor& A); template - void qr(Tensor& A, Tensor& R); + void gelss(const Tensor& a, const Tensor& b, double rcond, + Tensor& x, Tensor::scalar_type >& s, + long &rank, Tensor::scalar_type>& sumsq); template - void qr(Tensor& A, Tensor& R); + void gelss(const Tensor& a, const Tensor& b, double rcond, + Tensor& x, Tensor::scalar_type >& s, + long &rank, Tensor::scalar_type>& sumsq); template - void qr(Tensor& A, Tensor& R); - - + void syev(const Tensor& A, + Tensor& V, Tensor::scalar_type >& e); template - void lq(Tensor& A, Tensor& L); + void syev(const Tensor& A, + Tensor& V, Tensor::scalar_type >& e); template - void lq_result(Tensor& A, Tensor& R, Tensor& tau, Tensor& work, - bool do_qr); - + void geev(const Tensor& A, Tensor& V, Tensor& e); template - void geqp3(Tensor& A, Tensor& tau, Tensor& jpvt); - -// template -// void triangular_solve(const Tensor& L, Tensor& B, -// const char* side, const char* transa); + void sygv(const Tensor& A, const Tensor& B, int itype, + Tensor& V, Tensor::scalar_type >& e); + template + void sygv(const Tensor& A, const Tensor& B, int itype, + Tensor& V, Tensor::scalar_type >& e); template - void orgqr(Tensor& A, const Tensor& tau); + void ggev(const Tensor& A, Tensor& B, Tensor& V, Tensor& e); template - void svd(const Tensor& a, Tensor& U, - Tensor::scalar_type >& s, Tensor& VT); + void cholesky(Tensor& A); + template + void cholesky(Tensor& A); template - void svd_result(Tensor& a, Tensor& U, - Tensor::scalar_type >& s, Tensor& VT, - Tensor& work); + void rr_cholesky(Tensor& A, typename Tensor::scalar_type tol, Tensor& piv, int& rank); + template + void rr_cholesky(Tensor& A, typename Tensor::scalar_type tol, Tensor& piv, int& rank); template - void svd(const Tensor& a, Tensor& U, - Tensor::scalar_type >& s, Tensor& VT); + void geqp3(Tensor& A, Tensor& tau, Tensor& jpvt); template - void svd_result(Tensor& a, Tensor& U, - Tensor::scalar_type >& s, Tensor& VT, - Tensor& work); + void geqp3(Tensor& A, Tensor& tau, Tensor& jpvt); template - void gelss(const Tensor& a, const Tensor& b, double rcond, - Tensor& x, Tensor::scalar_type >& s, - long &rank, Tensor::scalar_type>& sumsq); + void qr(Tensor& A, Tensor& R); template - void syev(const Tensor& A, - Tensor& V, Tensor::scalar_type >& e); + void qr(Tensor& A, Tensor& R); -// bryan edits start template - void geev(const Tensor& A, Tensor& V, Tensor>& e); + void qr(Tensor& A, Tensor& R); template - void ggev(const Tensor& A, Tensor& B, Tensor& V, Tensor>& e); -// bryan edits end + void qr(Tensor& A, Tensor& R); template - void cholesky(Tensor& A); + void lq(Tensor& A, Tensor& L); template - void rr_cholesky(Tensor& A, typename Tensor::scalar_type tol, Tensor& piv, int& rank); + void lq(Tensor& A, Tensor& L); -// template -// void triangular_solve(const Tensor& L, Tensor& B, -// const char* side, const char* transa); template - void gesv(const Tensor& a, const Tensor& b, Tensor& x); + void lq_result(Tensor& A, Tensor& R, Tensor& tau, Tensor& work, + bool do_qr); template - void gesv(const Tensor& a, const Tensor& b, Tensor& x); + void lq_result(Tensor& A, Tensor& R, Tensor& tau, Tensor& work, + bool do_qr); template - void sygv(const Tensor& A, const Tensor& B, int itype, - Tensor& V, Tensor::scalar_type >& e); + void orgqr(Tensor& A, const Tensor& tau); + template - void sygv(const Tensor& A, const Tensor& B, int itype, - Tensor& V, Tensor::scalar_type >& e); + void orgqr(Tensor& A, const Tensor& tau); template void orgqr(Tensor& A, const Tensor& tau);